blob: a143302a4e0a50227a3c7d83f8e956b29abf85bc [file] [log] [blame]
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef FUZZTEST_FUZZTEST_INTERNAL_DOMAIN_H_
#define FUZZTEST_FUZZTEST_INTERNAL_DOMAIN_H_
#include <algorithm>
#include <array>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <limits>
#include <list>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
#include <tuple>
#include <type_traits>
#include <utility>
#include <variant>
#include <vector>
#include "absl/container/flat_hash_set.h"
#include "absl/numeric/bits.h"
#include "absl/numeric/int128.h"
#include "absl/random/bit_gen_ref.h"
#include "absl/random/random.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "absl/time/time.h"
#include "absl/types/span.h"
#include "./fuzztest/internal/any.h"
#include "./fuzztest/internal/coverage.h"
#include "./fuzztest/internal/logging.h"
#include "./fuzztest/internal/meta.h"
#include "./fuzztest/internal/regexp.h"
#include "./fuzztest/internal/serialization.h"
#include "./fuzztest/internal/table_of_recent_compares.h"
#include "./fuzztest/internal/type_support.h"
namespace fuzztest {
template <typename>
class Domain;
} // namespace fuzztest
namespace fuzztest::internal {
// We use this to get a nice compiler error when `T` and `U` don't match instead
// of just "false".
template <typename T, typename U>
constexpr void CheckIsSame() {
static_assert(std::is_same_v<std::remove_const_t<T>, std::remove_const_t<U>>);
}
// Corpus value type used by Domain<T> template, regardless of T.
using GenericDomainCorpusType = CopyableAny;
// Base class for all domains.
// It is "untyped" in that it erases all value_type/corpus_type inputs and
// outputs. This allows code sharing of the runtime.
// All the `Untyped[Name]` functions implement the same API as the `[Name]`
// function but marshalls the inputs and output through the generic types.
class UntypedDomainInterface {
public:
virtual ~UntypedDomainInterface() {}
virtual std::unique_ptr<UntypedDomainInterface> Clone() const = 0;
virtual GenericDomainCorpusType UntypedInit(absl::BitGenRef) = 0;
virtual void UntypedMutate(GenericDomainCorpusType& val, absl::BitGenRef prng,
bool only_shrink) = 0;
virtual void UntypedUpdateMemoryDictionary(
const GenericDomainCorpusType& val) = 0;
virtual std::optional<GenericDomainCorpusType> UntypedParseCorpus(
const IRObject& obj) const = 0;
virtual IRObject UntypedSerializeCorpus(
const GenericDomainCorpusType& v) const = 0;
virtual uint64_t UntypedCountNumberOfFields(
const GenericDomainCorpusType&) = 0;
virtual uint64_t UntypedMutateSelectedField(GenericDomainCorpusType&,
absl::BitGenRef, bool,
uint64_t) = 0;
virtual MoveOnlyAny UntypedGetValue(
const GenericDomainCorpusType& v) const = 0;
// UntypedPrintCorpusValue is special in that it has an extra parameter
// `tuple_elem`. This is used to instruct the `std::tuple` domain to print one
// particular element instead of the whole tuple. This is what the runtime
// uses to print out the arguments when a counterexample is found.
// In the `std::tuple` case, it also returns the number of elements in the
// tuple.
virtual int UntypedPrintCorpusValue(
const GenericDomainCorpusType& val, absl::FormatRawSink out,
internal::PrintMode mode,
std::optional<int> tuple_elem = std::nullopt) const = 0;
// Returns true if the corpus value should be serialized directly as a string
// instead of in the FuzzTest format.
virtual bool IsDirectlySerializable() const = 0;
};
// A typed subinterface that provides the methods to handle `value_type`
// inputs/outputs. Some callers require the actual `value_type`.
template <typename ValueType>
class TypedDomainInterface : public UntypedDomainInterface {
public:
virtual ValueType TypedGetValue(const GenericDomainCorpusType& v) const = 0;
virtual std::optional<GenericDomainCorpusType> TypedFromValue(
const ValueType& v) const = 0;
MoveOnlyAny UntypedGetValue(const GenericDomainCorpusType& v) const final {
return MoveOnlyAny(std::in_place_type<ValueType>, TypedGetValue(v));
}
};
template <typename Derived,
typename ValueType = ExtractTemplateParameter<0, Derived>>
class DomainBase : public TypedDomainInterface<ValueType> {
public:
DomainBase() {
// Check that the interface of `Derived` matches the requirements for a
// domain implementation. We check these inside the constructor of
// `DomainBase`, where `Derived` is already fully defined. If we try to
// check them at class scope we would see an incomplete `Derived` class and
// the checks would not work.
// Has value_type.
using CheckValueType = typename Derived::value_type;
static_assert(std::is_same_v<ValueType, CheckValueType>);
if constexpr (Derived::has_custom_corpus_type) {
// The type of values that are mutated and stored internally in the
// "corpus" may be different from the type of values produced by the
// domain. For example, the corpus_type of InRegexp is a custom data
// structure representing a path through a state machine, but the domain
// produces values of type std::string.
using CheckCorpusType [[maybe_unused]] = typename Derived::corpus_type;
} else {
CheckIsSame<typename Derived::value_type, corpus_type_t<Derived>>();
}
}
std::unique_ptr<UntypedDomainInterface> Clone() const final {
return std::make_unique<Derived>(derived());
}
GenericDomainCorpusType UntypedInit(absl::BitGenRef ref) final {
return GenericDomainCorpusType(std::in_place_type<corpus_type_t<Derived>>,
derived().Init(ref));
}
void UntypedMutate(GenericDomainCorpusType& val, absl::BitGenRef prng,
bool only_shrink) final {
derived().Mutate(val.GetAs<corpus_type_t<Derived>>(), prng, only_shrink);
}
void UntypedUpdateMemoryDictionary(const GenericDomainCorpusType& val) final {
derived().UpdateMemoryDictionary(val.GetAs<corpus_type_t<Derived>>());
}
ValueType TypedGetValue(const GenericDomainCorpusType& v) const final {
return derived().GetValue(v.GetAs<corpus_type_t<Derived>>());
}
std::optional<GenericDomainCorpusType> TypedFromValue(
const ValueType& v) const final {
if (auto c = derived().FromValue(v)) {
return GenericDomainCorpusType(std::in_place_type<corpus_type_t<Derived>>,
*std::move(c));
} else {
return std::nullopt;
}
}
std::optional<GenericDomainCorpusType> UntypedParseCorpus(
const IRObject& obj) const final {
if (auto res = derived().ParseCorpus(obj)) {
return GenericDomainCorpusType(std::in_place_type<corpus_type_t<Derived>>,
*std::move(res));
} else {
return std::nullopt;
}
}
IRObject UntypedSerializeCorpus(
const GenericDomainCorpusType& v) const final {
return derived().SerializeCorpus(
v.template GetAs<corpus_type_t<Derived>>());
}
bool IsDirectlySerializable() const final {
return Derived::is_directly_serializable;
}
uint64_t UntypedCountNumberOfFields(const GenericDomainCorpusType& v) final {
return derived().CountNumberOfFields(v.GetAs<corpus_type_t<Derived>>());
}
uint64_t UntypedMutateSelectedField(GenericDomainCorpusType& v,
absl::BitGenRef prng, bool only_shrink,
uint64_t selected_field_index) final {
return derived().MutateSelectedField(v.GetAs<corpus_type_t<Derived>>(),
prng, only_shrink,
selected_field_index);
}
int UntypedPrintCorpusValue(const GenericDomainCorpusType& val,
absl::FormatRawSink out, internal::PrintMode mode,
std::optional<int> tuple_elem) const override {
FUZZTEST_INTERNAL_CHECK(
!tuple_elem.has_value(),
"No tuple element should be specified for this override.");
internal::PrintValue(derived(), val.GetAs<corpus_type_t<Derived>>(), out,
mode);
return -1;
}
// Default GetValue and FromValue functions for !has_custom_corpus_type
// domains.
ValueType GetValue(const ValueType& v) const {
static_assert(!Derived::has_custom_corpus_type);
return v;
}
std::optional<ValueType> FromValue(const ValueType& v) const {
static_assert(!Derived::has_custom_corpus_type);
return v;
}
template <typename D = Derived>
std::optional<corpus_type_t<D>> ParseCorpus(const IRObject& obj) const {
static_assert(!D::has_custom_corpus_type);
return obj.ToCorpus<corpus_type_t<D>>();
}
template <typename D = Derived>
IRObject SerializeCorpus(const corpus_type_t<D>& v) const {
static_assert(!D::has_custom_corpus_type);
return IRObject::FromCorpus(v);
}
template <typename D = Derived>
void UpdateMemoryDictionary(const corpus_type_t<D>& val) {}
template <typename D = Derived>
uint64_t CountNumberOfFields(const corpus_type_t<D>&) {
return 0;
}
template <typename D = Derived>
uint64_t MutateSelectedField(corpus_type_t<D>&, absl::BitGenRef, bool,
uint64_t) {
return 0;
}
static constexpr bool has_custom_corpus_type = false;
// Indicates that the corpus value can be serialized directly as a string,
// without using the intermediate serialization format.
static constexpr bool is_directly_serializable = false;
private:
Derived& derived() { return static_cast<Derived&>(*this); }
const Derived& derived() const { return static_cast<const Derived&>(*this); }
};
enum class IncludeEnd { kYes, kNo };
// For cases where the type is a container, choose one of the elements in the
// container.
template <typename Container>
auto ChoosePosition(Container& val, IncludeEnd include_end,
absl::BitGenRef prng) {
size_t i = absl::Uniform<size_t>(
prng, 0, include_end == IncludeEnd::kYes ? val.size() + 1 : val.size());
return std::next(val.begin(), i);
}
// Given a parameter pack of functions `f`, run exactly one of the functions.
template <typename... F>
void RunOne(absl::BitGenRef prng, F... f) {
ApplyIndex<sizeof...(F)>([&](auto... I) {
int i = absl::Uniform<int>(prng, 0, sizeof...(F));
((i == I ? (void)f() : (void)0), ...);
});
}
template <typename T, size_t N, typename F>
T ChooseOneOr(const T (&values)[N], absl::BitGenRef prng, F f) {
int i = absl::Uniform<int>(absl::IntervalClosedClosed, prng, 0, N);
return i == N ? f() : values[i];
}
// Random bit flip: minimal mutation to a field, it will converge
// the hamming distance of a value to its target in constant steps.
template <typename T>
void RandomBitFlip(absl::BitGenRef prng, T& val, size_t range) {
using U = MakeUnsignedT<T>;
U u = static_cast<U>(val);
u ^= U{1} << absl::Uniform<int>(prng, 0, range);
val = static_cast<T>(u);
}
// BitWidth of the value of val, specially handle uint12 and int128, because
// they don't have overloads in absl::bit_width.
template <typename T>
size_t BitWidth(T val) {
if constexpr (std::is_same_v<T, absl::int128> ||
std::is_same_v<T, absl::uint128>) {
auto val_unsigned = MakeUnsignedT<T>(val);
size_t res = 0;
while (val_unsigned >>= 1) ++res;
return res;
} else {
return absl::bit_width(static_cast<MakeUnsignedT<T>>(val));
}
}
// Trying to copy a segment from `from` to `to`, with given offsets.
// Invalid offset that cause boundary check failures will make this function
// return false. `is_self` tells the function whether `from` and `to` points
// to the same object. Returns `true` iff copying results in `to` being mutated.
template <bool is_self, typename ContainerT>
bool CopyPart(const ContainerT& from, ContainerT& to,
size_t from_segment_start_offset, size_t from_segment_size,
size_t to_segment_start_offset, size_t max_size) {
bool mutated = false;
if (from_segment_size == 0) return mutated;
size_t from_segment_end_offset =
from_segment_start_offset + from_segment_size;
size_t to_segment_end_offset = to_segment_start_offset + from_segment_size;
if (from_segment_start_offset >= from.size() ||
to_segment_start_offset > to.size() ||
from_segment_end_offset > from.size() || to_segment_end_offset > max_size)
return mutated;
if (to_segment_end_offset > to.size()) {
mutated = true;
to.resize(to_segment_end_offset);
} else {
if (!std::equal(std::next(to.begin(), to_segment_start_offset),
std::next(to.begin(), to_segment_end_offset),
std::next(from.begin(), from_segment_start_offset),
std::next(from.begin(), from_segment_end_offset)))
mutated = true;
}
if (!mutated) return mutated;
if constexpr (!is_self) {
std::copy(std::next(from.begin(), from_segment_start_offset),
std::next(from.begin(), from_segment_end_offset),
std::next(to.begin(), to_segment_start_offset));
} else {
ContainerT tmp(std::next(from.begin(), from_segment_start_offset),
std::next(from.begin(), from_segment_end_offset));
std::copy(tmp.begin(), tmp.end(),
std::next(to.begin(), to_segment_start_offset));
}
return mutated;
}
// Trying to insert a segment from `from` to `to`, with given offsets.
// Invalid offset that cause boundary check failures will make this function
// return false. `is_self` tells the function whether `from` and `to` points
// to the same object. Returns `true` iff insertion results in `to` being
// mutated.
template <bool is_self, typename ContainerT>
bool InsertPart(const ContainerT& from, ContainerT& to,
size_t from_segment_start_offset, size_t from_segment_size,
size_t to_segment_start_offset, size_t max_size) {
bool mutated = false;
if (from_segment_size == 0) return mutated;
size_t from_segment_end_offset =
from_segment_start_offset + from_segment_size;
if (from_segment_start_offset >= from.size() ||
to_segment_start_offset > to.size() ||
from_segment_end_offset > from.size() ||
from_segment_size > max_size - to.size())
return mutated;
mutated = true;
if constexpr (!is_self) {
to.insert(std::next(to.begin(), to_segment_start_offset),
std::next(from.begin(), from_segment_start_offset),
std::next(from.begin(), from_segment_end_offset));
} else {
ContainerT tmp(std::next(from.begin(), from_segment_start_offset),
std::next(from.begin(), from_segment_end_offset));
to.insert(std::next(to.begin(), to_segment_start_offset), tmp.begin(),
tmp.end());
}
return mutated;
}
template <typename T>
T SampleFromUniformRange(absl::BitGenRef prng, T min, T max) {
return absl::Uniform(absl::IntervalClosedClosed, prng, min, max);
}
// Randomly apply Randomwalk or Uniform distribution or dictionary to mutate
// the val:
// RandomWalk: converge the absolute distance of a value to its target
// more efficiently.
// Uniform Distribution: go across some non-linear boundary that cannot
// be solved by bit flipping or randomwalk.
// Dictionary: if applicable, choose randomly from the dictionary. if
// dictionary fails to mutate, fall back to uniform.
template <unsigned char RANGE, typename T, typename IntegerDictionaryT>
void RandomWalkOrUniformOrDict(absl::BitGenRef prng, T& val, T min, T max,
const IntegerDictionaryT& temporary_dict,
const IntegerDictionaryT& permanent_dict,
std::optional<T>& permanent_dict_candidate) {
constexpr bool is_memory_dictionary_compatible_integer =
std::numeric_limits<T>::is_integer &&
(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
const bool can_use_memory_dictionary =
is_memory_dictionary_compatible_integer &&
GetExecutionCoverage() != nullptr;
const int action_count = 2 + can_use_memory_dictionary;
int action = absl::Uniform(prng, 0, action_count);
// Random walk.
if (action-- == 0) {
if (max / 2 - min / 2 <= RANGE) {
val = SampleFromUniformRange(prng, min, max);
} else {
T lo = min;
T hi = max;
if (val > lo + RANGE) lo = val - RANGE;
if (val < hi - RANGE) hi = val + RANGE;
val = SampleFromUniformRange(prng, lo, hi);
}
return;
}
// Random choose.
if (action-- == 0) {
val = SampleFromUniformRange(prng, min, max);
return;
}
// Dictionary
if constexpr (is_memory_dictionary_compatible_integer) {
if (can_use_memory_dictionary) {
if (action-- == 0) {
RunOne(
prng,
[&] {
if (temporary_dict.IsEmpty()) {
val = SampleFromUniformRange(prng, min, max);
} else {
val = temporary_dict.GetRandomSavedEntry(prng);
permanent_dict_candidate = val;
}
},
[&] {
if (permanent_dict.IsEmpty()) {
val = SampleFromUniformRange(prng, min, max);
} else {
val = permanent_dict.GetRandomSavedEntry(prng);
}
},
[&] {
auto entry = IntegerDictionary<T>::GetRandomTORCEntry(
val, prng,
GetExecutionCoverage()->GetTablesOfRecentCompares(), min,
max);
if (entry.has_value()) {
val = *entry;
} else {
val = SampleFromUniformRange(prng, min, max);
}
});
}
}
}
}
inline size_t GetOrGuessPositionHint(std::optional<size_t> position_hint,
size_t max, absl::BitGenRef prng) {
if (position_hint.has_value()) {
return *position_hint;
} else {
return ChooseOffset(max, prng);
}
}
// Try to copy `dict_entry.value` to `val`:
// If `dict_entry` has a position hint, copy to that offset; otherwise,
// guess a position hint. Return the copied-to position if mutation succeed,
// otherwise std::nullopt. Return true iff `val` is successfully mutated.
template <bool is_self, typename ContainerT>
bool CopyFromDictionaryEntry(const DictionaryEntry<ContainerT>& dict_entry,
absl::BitGenRef prng, ContainerT& val,
size_t max_size) {
if (dict_entry.value.size() >= max_size) return false;
size_t position_hint = GetOrGuessPositionHint(
dict_entry.position_hint,
std::min(val.size(), max_size - dict_entry.value.size()), prng);
return CopyPart<is_self>(dict_entry.value, val, 0, dict_entry.value.size(),
position_hint, max_size);
}
// The same as above, but set `permanent_dict_candidate` iff successfully
// mutated.
template <bool is_self, typename ContainerT>
bool CopyFromDictionaryEntry(
const DictionaryEntry<ContainerT>& dict_entry, absl::BitGenRef prng,
ContainerT& val, size_t max_size,
std::optional<DictionaryEntry<ContainerT>>& permanent_dict_candidate) {
if (dict_entry.value.size() >= max_size) return false;
size_t position_hint = GetOrGuessPositionHint(
dict_entry.position_hint,
std::min(val.size(), max_size - dict_entry.value.size()), prng);
bool mutated =
CopyPart<is_self>(dict_entry.value, val, 0, dict_entry.value.size(),
position_hint, max_size);
if (mutated) {
permanent_dict_candidate = {position_hint, val};
}
return mutated;
}
// Try to insert `dict_entry.value` to `val`:
// If `dict_entry` has a position hint, copy to that offset; otherwise,
// guess a position hint. Return the inserted-to position if mutation succeed,
// otherwise std::nullopt. Return true iff successfully mutated.
template <bool is_self, typename ContainerT>
bool InsertFromDictionaryEntry(const DictionaryEntry<ContainerT>& dict_entry,
absl::BitGenRef prng, ContainerT& val,
size_t max_size) {
if (dict_entry.value.size() >= max_size) return false;
size_t position_hint =
GetOrGuessPositionHint(dict_entry.position_hint, val.size(), prng);
return InsertPart<is_self>(dict_entry.value, val, 0, dict_entry.value.size(),
position_hint, max_size);
}
// The same as above, but set `permanent_dict_candidate` iff successfully
// mutated.
template <bool is_self, typename ContainerT>
bool InsertFromDictionaryEntry(
const DictionaryEntry<ContainerT>& dict_entry, absl::BitGenRef prng,
ContainerT& val, size_t max_size,
std::optional<DictionaryEntry<ContainerT>>& permanent_dict_candidate) {
if (dict_entry.value.size() >= max_size) return false;
size_t position_hint =
GetOrGuessPositionHint(dict_entry.position_hint, val.size(), prng);
bool mutated =
InsertPart<is_self>(dict_entry.value, val, 0, dict_entry.value.size(),
position_hint, max_size);
if (mutated) {
permanent_dict_candidate = {position_hint, val};
}
return mutated;
}
template <typename ContainerT>
bool ApplyDictionaryMutationAndSavePermanentCandidate(
ContainerT& val, const DictionaryEntry<ContainerT>& entry,
absl::BitGenRef prng,
std::optional<DictionaryEntry<ContainerT>>& permanent_dict_candidate,
size_t max_size) {
bool mutated = false;
RunOne(
prng,
// Temporary dictionary replace contents from position hint.
[&] {
mutated = CopyFromDictionaryEntry<false>(entry, prng, val, max_size,
permanent_dict_candidate);
},
// Temporary dictionary insert into position hint.
[&] {
mutated = InsertFromDictionaryEntry<false>(entry, prng, val, max_size,
permanent_dict_candidate);
});
return mutated;
}
// Replace or insert the dictionary contents to position hints.
template <typename ContainerT>
bool MemoryDictionaryMutation(
ContainerT& val, absl::BitGenRef prng,
ContainerDictionary<ContainerT>& temporary_dict,
ContainerDictionary<ContainerT>& manual_dict,
ContainerDictionary<ContainerT>& permanent_dict,
std::optional<DictionaryEntry<ContainerT>>& permanent_dict_candidate,
size_t max_size) {
bool mutated = false;
const bool can_use_manual_dictionary = !manual_dict.IsEmpty();
const bool can_use_temporary_dictionary = !temporary_dict.IsEmpty();
const bool can_use_permanent_dictionary = !permanent_dict.IsEmpty();
const int dictionary_action_count = 1 + can_use_manual_dictionary +
can_use_temporary_dictionary +
can_use_permanent_dictionary;
int dictionary_action = absl::Uniform(prng, 0, dictionary_action_count);
if (can_use_temporary_dictionary && dictionary_action-- == 0) {
mutated = ApplyDictionaryMutationAndSavePermanentCandidate(
val, temporary_dict.GetRandomSavedEntry(prng), prng,
permanent_dict_candidate, max_size);
}
if (can_use_manual_dictionary && dictionary_action-- == 0) {
mutated = ApplyDictionaryMutationAndSavePermanentCandidate(
val, manual_dict.GetRandomSavedEntry(prng), prng,
permanent_dict_candidate, max_size);
}
if (can_use_permanent_dictionary && dictionary_action-- == 0) {
RunOne(
prng,
// Permanent dictionary replace contents from position hint.
[&] {
mutated = CopyFromDictionaryEntry<false>(
permanent_dict.GetRandomSavedEntry(prng), prng, val, max_size);
},
// Permanent dictionary insert into position hint.
[&] {
mutated = InsertFromDictionaryEntry<false>(
permanent_dict.GetRandomSavedEntry(prng), prng, val, max_size);
});
}
// Pick entries from tables_of_recent_compares(TORC) directly.
if (dictionary_action-- == 0) {
auto dictionary_entry = ContainerDictionary<ContainerT>::GetRandomTORCEntry(
val, prng, GetExecutionCoverage()->GetTablesOfRecentCompares());
if (dictionary_entry.has_value()) {
mutated = ApplyDictionaryMutationAndSavePermanentCandidate(
val, *dictionary_entry, prng, permanent_dict_candidate, max_size);
}
}
return mutated;
}
// Randomly erases a contiguous chunk of at least 1 and at most half the
// elements in `val`. The final size of `val` will be at least `min_size`.
template <typename ContainerT>
void EraseRandomChunk(ContainerT& val, absl::BitGenRef prng, size_t min_size) {
if (val.size() <= min_size) return;
size_t chunk_size =
absl::Uniform(absl::IntervalClosedClosed, prng, size_t{1},
std::min(val.size() - min_size, val.size() >> 1));
size_t chunk_offset = ChooseOffset(val.size() - chunk_size, prng);
auto it_start = std::next(val.begin(), chunk_offset);
auto it_end = std::next(it_start, chunk_size);
val.erase(it_start, it_end);
}
// Randomly inserts `new_element_val` at least 1 and at most 15 times at a
// random position in `val`. The final size of `val` will be at most `max_size`.
template <typename ContainerT, typename T>
void InsertRandomChunk(ContainerT& val, absl::BitGenRef prng, size_t max_size,
T new_element_val) {
if (val.size() >= max_size) return;
size_t grows = absl::Uniform(absl::IntervalClosedClosed, prng, size_t{1},
std::min(max_size - val.size(), size_t{15}));
size_t grow_offset = ChooseOffset(val.size(), prng);
while (grows--) {
val.insert(std::next(val.begin(), grow_offset), new_element_val);
}
}
// Helper serialization functions for common patterns: optional/variant/tuple.
template <typename... Domain>
IRObject SerializeWithDomainVariant(
const std::tuple<Domain...>& domains,
const std::variant<corpus_type_t<Domain>...>& v) {
IRObject obj;
auto& subs = obj.MutableSubs();
subs.push_back(IRObject::FromCorpus(v.index()));
Switch<sizeof...(Domain)>(v.index(), [&](auto I) {
subs.push_back(std::get<I>(domains).SerializeCorpus(std::get<I>(v)));
});
return obj;
}
template <typename... Domain,
typename ReturnT = std::variant<corpus_type_t<Domain>...>>
std::optional<ReturnT> ParseWithDomainVariant(
const std::tuple<Domain...>& domains, const IRObject& obj) {
auto subs = obj.Subs();
if (!subs || subs->size() != 2) return std::nullopt;
auto alternative = (*subs)[0].GetScalar<size_t>();
if (!alternative || *alternative >= sizeof...(Domain)) return std::nullopt;
return Switch<sizeof...(Domain)>(
*alternative, [&](auto I) -> std::optional<ReturnT> {
auto inner_corpus = std::get<I>(domains).ParseCorpus((*subs)[1]);
if (!inner_corpus) return std::nullopt;
return ReturnT(std::in_place_index<I>, *std::move(inner_corpus));
});
}
template <typename Domain>
auto SerializeWithDomainOptional(
const Domain& domain,
const std::variant<std::monostate, corpus_type_t<Domain>>& v) {
IRObject obj;
auto& subs = obj.MutableSubs();
subs.push_back(IRObject(v.index()));
if (v.index() == 1) {
subs.push_back(domain.SerializeCorpus(std::get<1>(v)));
}
return obj;
}
template <typename Domain, typename ReturnT = std::variant<
std::monostate, corpus_type_t<Domain>>>
std::optional<ReturnT> ParseWithDomainOptional(const Domain& domain,
const IRObject& obj) {
auto subs = obj.Subs();
if (!subs || subs->empty()) return std::nullopt;
auto index = (*subs)[0].GetScalar<size_t>();
if (index == 0) {
if (subs->size() != 1) return std::nullopt;
return ReturnT();
} else if (index == 1) {
if (subs->size() != 2) return std::nullopt;
auto inner_corpus = domain.ParseCorpus((*subs)[1]);
if (!inner_corpus) return std::nullopt;
return ReturnT(std::in_place_index<1>, *std::move(inner_corpus));
} else {
return std::nullopt;
}
}
template <typename... Domain>
auto SerializeWithDomainTuple(
const std::tuple<Domain...>& domains,
const std::tuple<corpus_type_t<Domain>...>& corpus) {
// Flatten singleton tuples to enable direct serialization of
// directly-serializable domains nested in singleton tuples.
if constexpr (sizeof...(Domain) == 1) {
return std::get<0>(domains).SerializeCorpus(std::get<0>(corpus));
}
IRObject obj;
auto& subs = obj.MutableSubs();
ApplyIndex<sizeof...(Domain)>([&](auto... I) {
(subs.push_back(std::get<I>(domains).SerializeCorpus(std::get<I>(corpus))),
...);
});
return obj;
}
// Parse a corpus given a tuple of domains, skipping the first `skip` subs.
template <typename... Domain>
std::optional<std::tuple<corpus_type_t<Domain>...>> ParseWithDomainTuple(
const std::tuple<Domain...>& domains, const IRObject& obj, int skip = 0) {
auto parse_subs = [&](absl::Span<const IRObject> subs) {
return ApplyIndex<sizeof...(Domain)>([&](auto... I) {
return [](auto... opts) {
return (!opts || ...)
? std::nullopt
: std::optional(std::tuple<corpus_type_t<Domain>...>{
*std::move(opts)...});
}(std::get<I>(domains).ParseCorpus((subs)[I + skip])...);
});
};
// Reverse the flattening of singleton tuples done by
// SerializeWithDomainTuple() by treating `obj` as a subobject, thus
// effectively wrapping the parsed result in a tuple.
if (sizeof...(Domain) == 1 && skip == 0) {
return parse_subs({obj});
}
auto subs = obj.Subs();
if (!subs || subs->size() != sizeof...(Domain) + skip) return std::nullopt;
return parse_subs(*subs);
}
template <typename T, typename = void>
class ArbitraryImpl {
static_assert(always_false<T>,
"=> Type not supported yet. Consider filing an issue."
);
};
// Capture const values. This is a workaround in order to enable
// Arbitrary<std::map<std::string, int>>() and similar container domains that
// require Arbitrary<std::pair< _const_ std::string, int>>() to be defined,
// which in turn requires Arbitrary<const std::string>() to be defined.
template <typename T>
class ArbitraryImpl<const T> : public ArbitraryImpl<T> {};
// For monostate types with a default constructor, just give the single value.
template <typename T>
class ArbitraryImpl<T, std::enable_if_t<is_monostate_v<T>>>
: public DomainBase<ArbitraryImpl<T>> {
public:
using value_type = T;
value_type Init(absl::BitGenRef) { return value_type{}; }
void Mutate(value_type&, absl::BitGenRef, bool) {}
auto GetPrinter() const { return MonostatePrinter{}; }
};
// REQUIRES: |target| < |val|
template <typename T>
T ShrinkTowards(absl::BitGenRef prng, T val, T target) {
if (val < target) {
return absl::Uniform(absl::IntervalOpenClosed, prng, val, target);
} else {
return absl::Uniform(absl::IntervalClosedOpen, prng, target, val);
}
}
template <>
class ArbitraryImpl<bool> : public DomainBase<ArbitraryImpl<bool>> {
public:
using value_type = bool;
value_type Init(absl::BitGenRef prng) {
return static_cast<bool>(absl::Uniform(prng, 0, 2));
}
void Mutate(value_type& val, absl::BitGenRef, bool only_shrink) {
if (only_shrink) {
val = false;
} else {
val = !val;
}
}
auto GetPrinter() const { return IntegralPrinter{}; }
};
template <typename T>
class ArbitraryImpl<T, std::enable_if_t<!std::is_const_v<T> &&
std::numeric_limits<T>::is_integer>>
: public DomainBase<ArbitraryImpl<T>> {
public:
using value_type = T;
static constexpr bool is_memory_dictionary_compatible_v =
sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8;
using IntegerDictionaryT =
std::conditional_t<is_memory_dictionary_compatible_v,
IntegerDictionary<T>, bool>;
value_type Init(absl::BitGenRef prng) {
const auto choose_from_all = [&] {
return absl::Uniform(absl::IntervalClosedClosed, prng,
std::numeric_limits<T>::min(),
std::numeric_limits<T>::max());
};
if constexpr (sizeof(T) == 1) {
return choose_from_all();
} else {
static constexpr T special[] = {
T{0}, T{1},
// For some types, ~T{} is promoted to int. Convert back to T.
static_cast<T>(~T{}),
std::numeric_limits<T>::is_signed
? std::numeric_limits<T>::max()
: std::numeric_limits<T>::max() >> 1};
return ChooseOneOr(special, prng, choose_from_all);
}
}
void Mutate(value_type& val, absl::BitGenRef prng, bool only_shrink) {
permanent_dict_candidate_ = std::nullopt;
if (only_shrink) {
if (val == 0) return;
val = ShrinkTowards(prng, val, T{0});
return;
}
const T prev = val;
do {
// Randomly apply 4 kinds of mutations with equal probabilities.
// Use permanent_dictionary_ or temporary_dictionary_ with equal
// probabilities.
if (absl::Bernoulli(prng, 0.25)) {
RandomBitFlip(prng, val, sizeof(T) * 8);
} else {
RandomWalkOrUniformOrDict<5>(prng, val, std::numeric_limits<T>::min(),
std::numeric_limits<T>::max(),
temporary_dict_, permanent_dict_,
permanent_dict_candidate_);
}
// Make sure Mutate really mutates.
} while (val == prev);
}
void UpdateMemoryDictionary(const value_type& val) {
if constexpr (is_memory_dictionary_compatible_v) {
if (GetExecutionCoverage() != nullptr) {
temporary_dict_.MatchEntriesFromTableOfRecentCompares(
val, GetExecutionCoverage()->GetTablesOfRecentCompares(),
std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
if (permanent_dict_candidate_.has_value() &&
permanent_dict_.Size() < kPermanentDictMaxSize) {
permanent_dict_.AddEntry(std::move(*permanent_dict_candidate_));
permanent_dict_candidate_ = std::nullopt;
}
}
}
}
auto GetPrinter() const { return IntegralPrinter{}; }
private:
// Matched snapshots from table of recent compares.
// It's the "unverified" dictionary entries: the mutated
// value matched something in this snapshot, but not sure
// if it will lead to new coverage.
IntegerDictionaryT temporary_dict_;
// Set of dictionary entries from previous `temporary_dict_`
// that leads to new coverage. This is based on a heuristic
// that such entries may lead to interesting behaviors even
// after the first new coverage it triggered.
IntegerDictionaryT permanent_dict_;
std::optional<T> permanent_dict_candidate_;
static constexpr size_t kPermanentDictMaxSize = 512;
};
template <typename T>
class ArbitraryImpl<T, std::enable_if_t<std::is_floating_point_v<T>>>
: public DomainBase<ArbitraryImpl<T>> {
public:
using value_type = T;
value_type Init(absl::BitGenRef prng) {
const T special[] = {
T{0.0}, T{-0.0}, T{1.0}, T{-1.0}, std::numeric_limits<T>::max(),
std::numeric_limits<T>::infinity(), -std::numeric_limits<T>::infinity(),
// std::nan is double. Cast to T explicitly.
static_cast<T>(std::nan(""))};
return ChooseOneOr(special, prng,
[&] { return absl::Uniform(prng, T{0}, T{1}); });
}
void Mutate(value_type& val, absl::BitGenRef prng, bool only_shrink) {
if (only_shrink) {
if (!std::isfinite(val) || val == 0) return;
val = ShrinkTowards(prng, val, T{0});
return;
}
const T prev = val;
do {
// If it is not finite we can't change it a bit because it would stay the
// same. eg inf/2 == inf.
if (!std::isfinite(val)) {
val = Init(prng);
} else {
RunOne(
prng, //
[&] { val = val / 2; }, //
[&] { val = -val; }, //
[&] { val = val + 1; }, //
[&] { val = val * 3; });
}
// Make sure Mutate really mutates.
} while (val == prev || (std::isnan(prev) && std::isnan(val)));
}
auto GetPrinter() const { return FloatingPrinter{}; }
};
// Common base for container domains. Provides common APIs.
template <typename Derived>
class ContainerOfImplBase : public DomainBase<Derived> {
using InnerDomainT = ExtractTemplateParameter<1, Derived>;
public:
using value_type = ExtractTemplateParameter<0, Derived>;
static constexpr bool has_custom_corpus_type =
// Specialized handling of vector<bool> since you can't actually hold
// a reference to a single bit but instead get a proxy value.
is_bitvector_v<value_type> ||
// If the container is associative we force a custom corpus type to allow
// modifying the keys.
is_associative_container_v<value_type> ||
InnerDomainT::has_custom_corpus_type;
// `corpus_type` might be immutable (eg std::pair<const int, int> for maps
// inner domain). We store them in a std::list to allow for this.
using corpus_type =
std::conditional_t<has_custom_corpus_type,
std::list<corpus_type_t<InnerDomainT>>, value_type>;
// Some container mutation only applies to vector or string types which do
// not have a custom corpus type.
static constexpr bool is_vector_or_string =
!has_custom_corpus_type &&
(is_vector_v<value_type> || std::is_same_v<value_type, std::string>);
static constexpr bool is_vector_of_uint8_or_char =
!has_custom_corpus_type && is_vector_v<value_type> &&
(std::is_same_v<ExtractTemplateParameter<0, value_type>, uint8_t> ||
std::is_same_v<ExtractTemplateParameter<0, value_type>, char>);
static constexpr bool is_directly_serializable =
!has_custom_corpus_type &&
(is_vector_of_uint8_or_char || std::is_same_v<value_type, std::string>);
// The current implementation of container dictionary only supports
// vector or string container value_type, whose InnerDomain is
// an `ArbitraryImpl<T2>` where T2 is an integral type.
static constexpr bool container_has_memory_dict =
is_memory_dictionary_compatible<InnerDomainT>::value &&
is_vector_or_string;
// If `!container_has_memory_dict`, dict_type is a bool and dict
// is not used. This conditional_t may be neccessary because some
// value_type may not have copy constructors(for example, proto).
// Making it a safe type(bool) to not break some targets.
using dict_type = std::conditional_t<container_has_memory_dict,
ContainerDictionary<value_type>, bool>;
using dict_entry_type = std::conditional_t<container_has_memory_dict,
DictionaryEntry<value_type>, bool>;
ContainerOfImplBase() = default;
explicit ContainerOfImplBase(InnerDomainT inner) : inner_(std::move(inner)) {}
void Mutate(corpus_type& val, absl::BitGenRef prng, bool only_shrink) {
permanent_dict_candidate_ = std::nullopt;
FUZZTEST_INTERNAL_CHECK(
val.size() >= this->min_size_ && val.size() <= this->max_size_, "size ",
val.size(), " is not between [", this->min_size_, "; ", this->max_size_,
"]");
const bool can_shrink = val.size() > this->min_size_;
const bool can_grow = !only_shrink && val.size() < this->max_size_;
const bool can_change = val.size() != 0;
const bool can_use_memory_dict = !only_shrink &&
container_has_memory_dict && can_change &&
GetExecutionCoverage() != nullptr;
const int action_count =
can_shrink + can_grow + can_change + can_use_memory_dict;
if (action_count == 0) return;
int action = absl::Uniform(prng, 0, action_count);
if (can_shrink) {
if (action-- == 0) {
// If !has_custom_corpus_type, try shrink a consecutive chunk
// or shrink 1 with equal probability.
// Otherwise always shrink 1.
if constexpr (!has_custom_corpus_type) {
if (absl::Bernoulli(prng, 0.5)) {
EraseRandomChunk(val, prng, this->min_size_);
return;
}
}
val.erase(ChoosePosition(val, IncludeEnd::kNo, prng));
return;
}
}
if (can_grow) {
if (action-- == 0) {
// If !has_custom_corpus_type, try grow a consecutive chunk or
// grow 1 with equal probability. Otherwise
// always grow 1.
if constexpr (!has_custom_corpus_type) {
if (absl::Bernoulli(prng, 0.5)) {
auto element_val = inner_.Init(prng);
InsertRandomChunk(val, prng, this->max_size_, element_val);
return;
}
}
Self().GrowOne(val, prng);
return;
}
}
if (can_change) {
if (action-- == 0) {
// If !has_custom_corpus_type, try mutate a consecutive chunk or
// mutate 1 with equal probability. Otherwise
// always mutate 1.
if constexpr (!has_custom_corpus_type) {
if (absl::Bernoulli(prng, 0.5)) {
size_t change_offset = ChooseOffset(val.size(), prng);
size_t changes =
absl::Uniform(absl::IntervalClosedClosed, prng, size_t{1},
std::min(val.size() - change_offset, size_t{15}));
auto it_start = std::next(val.begin(), change_offset);
auto it_end = std::next(it_start, changes);
for (; it_start != it_end; it_start = std::next(it_start)) {
Self().MutateElement(val, prng, it_start, only_shrink);
}
return;
}
}
Self().MutateElement(
val, prng, ChoosePosition(val, IncludeEnd::kNo, prng), only_shrink);
return;
}
}
if constexpr (container_has_memory_dict) {
if (can_use_memory_dict) {
if (action-- == 0) {
bool mutated = MemoryDictionaryMutation(
val, prng, temporary_dict_, manual_dict_, permanent_dict_,
permanent_dict_candidate_, this->max_size_);
// If dict failed, fall back to changing an element.
if (!mutated) {
Self().MutateElement(val, prng,
ChoosePosition(val, IncludeEnd::kNo, prng),
only_shrink);
}
return;
}
}
}
}
void UpdateMemoryDictionary(const corpus_type& val) {
// TODO(JunyangShao): Implement dictionary propagation to container
// elements. For now the propagation stops in container domains.
// Because all elements share an `inner_` and will share
// a dictionary if we propagate it, which makes the dictionary
// not efficient.
if constexpr (container_has_memory_dict) {
if (GetExecutionCoverage() != nullptr) {
temporary_dict_.MatchEntriesFromTableOfRecentCompares(
val, GetExecutionCoverage()->GetTablesOfRecentCompares());
if (permanent_dict_candidate_.has_value() &&
permanent_dict_.Size() < kPermanentDictMaxSize) {
permanent_dict_.AddEntry(std::move(*permanent_dict_candidate_));
permanent_dict_candidate_ = std::nullopt;
}
}
}
}
// These are specific for containers only. They are not part of the common
// Domain API.
Derived& WithSize(size_t s) { return WithMinSize(s).WithMaxSize(s); }
Derived& WithMinSize(size_t s) {
min_size_ = s;
return static_cast<Derived&>(*this);
}
Derived& WithMaxSize(size_t s) {
max_size_ = s;
return static_cast<Derived&>(*this);
}
Derived& WithDictionary(absl::Span<const value_type> manual_dict) {
static_assert(container_has_memory_dict,
"Manual Dictionary now only supports std::vector or "
"std::string or std::string_view.\n");
for (const value_type& entry : manual_dict) {
FUZZTEST_INTERNAL_CHECK(
entry.size() <= this->max_size_,
"At least one dictionary entry is larger than max container size.");
manual_dict_.AddEntry({std::nullopt, entry});
}
return static_cast<Derived&>(*this);
}
auto GetPrinter() const {
if constexpr (std::is_same_v<value_type, std::string>) {
// std::string has special handling for better output
return StringPrinter{};
} else {
return ContainerPrinter<Derived, InnerDomainT>{inner_};
}
}
value_type GetValue(const corpus_type& value) const {
if constexpr (has_custom_corpus_type) {
value_type result;
for (const auto& v : value) {
result.insert(result.end(), inner_.GetValue(v));
}
return result;
} else {
return value;
}
}
std::optional<corpus_type> FromValue(const value_type& value) const {
if constexpr (!has_custom_corpus_type) {
return value;
} else {
corpus_type res;
for (const auto& elem : value) {
auto inner_value = inner_.FromValue(elem);
if (!inner_value) return std::nullopt;
res.push_back(*std::move(inner_value));
}
return res;
}
}
std::optional<corpus_type> ParseCorpus(const IRObject& obj) const {
std::optional<corpus_type> res = ParseCorpusWithoutValidation(obj);
if (res.has_value()) {
value_type v = GetValue(*res);
if (v.size() < min_size_ || v.size() > max_size_) {
return std::nullopt;
}
}
return res;
}
IRObject SerializeCorpus(const corpus_type& v) const {
if constexpr (has_custom_corpus_type) {
IRObject obj;
auto& subs = obj.MutableSubs();
for (const auto& elem : v) {
subs.push_back(inner_.SerializeCorpus(elem));
}
return obj;
} else if constexpr (is_vector_of_uint8_or_char) {
return IRObject::FromCorpus(std::string{v.begin(), v.end()});
} else {
return IRObject::FromCorpus(v);
}
}
InnerDomainT Inner() const { return inner_; }
template <typename OtherDerived>
friend class ContainerOfImplBase;
template <typename OtherDerived>
void CopyConstraintsFrom(const ContainerOfImplBase<OtherDerived>& other) {
min_size_ = other.min_size_;
max_size_ = other.max_size_;
}
protected:
InnerDomainT inner_;
int ChooseRandomSize(absl::BitGenRef prng) {
// The container size should not be empty (unless max_size_ = 0) because the
// initialization should be random if possible.
// TODO(changochen): Increase the number of generated elements.
// Currently we make container generate zero or one element to avoid
// infinite recursion in recursive data structures. For the example, we want
// to build a domain for `struct X{ int leaf; vector<X> recursive`, the
// expected generated length is `E(X) = E(leaf) + E(recursive)`. If the
// container generate `0-10` elements when calling `Init`, then
// `E(recursive) = 4.5 E(X)`, which will make `E(X) = Infinite`.
// Make some smallish random seed containers.
return absl::Uniform(prng, min_size_,
std::min(max_size_ + 1, min_size_ + 2));
}
size_t min_size_ = 0;
size_t max_size_ = 1000;
private:
// Use the generic serializer when no custom corpus type is used, since it is
// more efficient. Eg a string value can be serialized as a string instead of
// as a sequence of char values.
std::optional<corpus_type> ParseCorpusWithoutValidation(
const IRObject& obj) const {
if constexpr (has_custom_corpus_type) {
auto subs = obj.Subs();
if (!subs) return std::nullopt;
corpus_type res;
for (const auto& elem : *subs) {
if (auto parsed_elem = inner_.ParseCorpus(elem)) {
res.insert(res.end(), std::move(*parsed_elem));
} else {
return std::nullopt;
}
}
return res;
} else if constexpr (is_vector_of_uint8_or_char) {
std::optional<std::string> str = obj.ToCorpus<std::string>();
if (!str.has_value()) return std::nullopt;
return corpus_type{str->begin(), str->end()};
} else {
return obj.ToCorpus<corpus_type>();
}
}
Derived& Self() { return static_cast<Derived&>(*this); }
// Temporary memory dictionary. Collected from tracing the program
// execution. It will always be empty if no execution_coverage_ is found,
// for example when running with other fuzzer engines.
dict_type temporary_dict_ = {};
// Dictionary provided by the user. It has the same type requirements as
// memory dictionaries, but it could be made more general.
// TODO(JunyangShao): make it more general.
dict_type manual_dict_ = {};
// Permanent memory dictionary. Contains entries upgraded from
// temporary_dict_. Upgrade happens when a temporary_dict_ entry
// leads to new coverage.
dict_type permanent_dict_ = {};
static constexpr size_t kPermanentDictMaxSize = 512;
// Keep tracks of what temporary_dict_ entry was used in the last dictionary
// mutation. Will get upgraded into permanent_dict_ if it leads to new
// coverages.
std::optional<dict_entry_type> permanent_dict_candidate_ = std::nullopt;
};
// Base class for associative container domains, such as Domain<std::set> and
// Domain<absl::flat_hash_map>; these container have a key_type, used for
// element access by key.
template <typename T, typename InnerDomain>
class AssociativeContainerOfImpl
: public ContainerOfImplBase<AssociativeContainerOfImpl<T, InnerDomain>> {
using Base = typename AssociativeContainerOfImpl::ContainerOfImplBase;
public:
using value_type = T;
using corpus_type = typename Base::corpus_type;
static constexpr bool has_custom_corpus_type = Base::has_custom_corpus_type;
static_assert(has_custom_corpus_type, "Must be custom to mutate keys");
AssociativeContainerOfImpl() = default;
explicit AssociativeContainerOfImpl(InnerDomain inner)
: Base(std::move(inner)) {}
corpus_type Init(absl::BitGenRef prng) {
const int size = this->ChooseRandomSize(prng);
corpus_type val;
Grow(val, prng, size, 10000);
if (val.size() < this->min_size_) {
// We tried to make a container with the minimum specified size and we
// could not after a lot of attempts. This could be caused by an
// unsatisfiable domain, such as one where the minimum desired size is
// greater than the number of unique `value_type` values that exist; for
// example, a uint8_t has only 256 possible values, so we can't create
// a std::set<uint8_t> whose size is greater than 256, as requested here:
//
// SetOf(Arbitrary<uint8_t>()).WithMinSize(300)
//
// Abort the test and inform the user.
AbortInTest(absl::StrFormat(R"(
[!] Ineffective use of WithSize()/WithMinSize() detected!
The domain failed trying to find enough values that satisfy the constraints.
The minimum size requested is %u and we could only find %u elements.
Please verify that the inner domain can provide enough values.
)",
this->min_size_, val.size()));
}
return val;
}
private:
friend Base;
void GrowOne(corpus_type& val, absl::BitGenRef prng) {
constexpr size_t kFailuresAllowed = 100;
Grow(val, prng, 1, kFailuresAllowed);
}
// Try to grow `val` by `n` elements.
void Grow(corpus_type& val, absl::BitGenRef prng, size_t n,
size_t failures_allowed) {
// Try a few times to insert a new element (correctly assuming the
// initialization yields a random element if possible). We might get
// duplicates. But don't try forever because we might be at the limit of the
// container. Eg a set<char> with 256 elements can't grow anymore.
//
// Use the real value to make sure we are not adding invalid elements to the
// list. The insertion in `real_value` will do the deduping for us.
auto real_value = this->GetValue(val);
const size_t final_size = real_value.size() + n;
while (real_value.size() < final_size) {
auto new_element = this->inner_.Init(prng);
if (real_value.insert(this->inner_.GetValue(new_element)).second) {
val.push_back(std::move(new_element));
} else {
// Just stop if we reached the allowed failures.
// Let the caller decide what to do.
if (failures_allowed-- == 0) return;
}
}
}
// Try to mutate the element in `it`.
void MutateElement(corpus_type& val, absl::BitGenRef prng,
typename corpus_type::iterator it, bool only_shrink) {
size_t failures_allowed = 100;
// Try a few times to mutate the element.
// If the mutation reduces the number of elements in the container it means
// we made the key collide with an existing element. Don't try forever as
// there might not be any other value that we can mutate to.
// Eg a set<char> with 256 elements can't mutate any of its elements.
//
// Use the real value to make sure we are not adding invalid elements to the
// list. The insertion in `real_value` will do the deduping for us.
corpus_type original_element_list;
original_element_list.splice(original_element_list.end(), val, it);
auto real_value = this->GetValue(val);
while (failures_allowed > 0) {
auto new_element = original_element_list.front();
this->inner_.Mutate(new_element, prng, only_shrink);
if (real_value.insert(this->inner_.GetValue(new_element)).second) {
val.push_back(std::move(new_element));
return;
} else {
--failures_allowed;
}
}
// Give up and put the element back.
val.splice(val.end(), original_element_list);
}
};
template <typename T, typename InnerDomain>
class SequenceContainerOfImpl
: public ContainerOfImplBase<SequenceContainerOfImpl<T, InnerDomain>> {
using Base = typename SequenceContainerOfImpl::ContainerOfImplBase;
public:
using value_type = T;
using corpus_type = typename Base::corpus_type;
SequenceContainerOfImpl() = default;
explicit SequenceContainerOfImpl(InnerDomain inner)
: Base(std::move(inner)) {}
corpus_type Init(absl::BitGenRef prng) {
const int size = this->ChooseRandomSize(prng);
corpus_type val;
while (val.size() < size) {
val.insert(val.end(), this->inner_.Init(prng));
}
return val;
}
uint64_t CountNumberOfFields(const corpus_type& val) {
uint64_t total_weight = 0;
for (auto& i : val) {
total_weight += this->inner_.CountNumberOfFields(i);
}
return total_weight;
}
uint64_t MutateSelectedField(corpus_type& val, absl::BitGenRef prng,
bool only_shrink,
uint64_t selected_field_index) {
uint64_t field_counter = 0;
for (auto& i : val) {
field_counter += this->inner_.MutateSelectedField(
i, prng, only_shrink, selected_field_index - field_counter);
if (field_counter >= selected_field_index) break;
}
return field_counter;
}
private:
friend Base;
void GrowOne(corpus_type& val, absl::BitGenRef prng) {
val.insert(ChoosePosition(val, IncludeEnd::kYes, prng),
this->inner_.Init(prng));
}
void MutateElement(corpus_type&, absl::BitGenRef prng,
typename corpus_type::iterator it, bool only_shrink) {
this->inner_.Mutate(*it, prng, only_shrink);
}
};
template <typename T, typename InnerDomain>
using ContainerOfImpl =
std::conditional_t<is_associative_container_v<T>,
AssociativeContainerOfImpl<T, InnerDomain>,
SequenceContainerOfImpl<T, InnerDomain>>;
template <typename T>
class ArbitraryImpl<
T, std::enable_if_t<always_true<T>,
decltype(
// Iterable
T().begin(), T().end(), T().size(),
// Values are mutable
// This rejects associative containers, for example
// *T().begin() = std::declval<typename
// T::value_type>(), Can insert and erase elements
T().insert(T().end(),
std::declval<typename T::value_type>()),
T().erase(T().begin()),
//
(void)0)>>
: public ContainerOfImpl<T, ArbitraryImpl<typename T::value_type>> {};
template <typename T>
class InRangeImpl : public DomainBase<InRangeImpl<T>> {
public:
using value_type = T;
constexpr static bool T_is_integer = std::numeric_limits<T>::is_integer;
constexpr static bool T_is_signed = std::is_signed<T>::value;
constexpr static bool T_is_memory_dictionary_compatible =
std::is_integral_v<T> &&
(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8);
using IntegerDictionaryT =
std::conditional_t<T_is_memory_dictionary_compatible,
IntegerDictionary<T>, bool>;
explicit InRangeImpl(T min, T max) : min_(min), max_(max) {
FUZZTEST_INTERNAL_CHECK_PRECONDITION(min < max,
"min must be smaller than max!");
if constexpr (!T_is_integer) {
FUZZTEST_INTERNAL_CHECK_PRECONDITION(
!(min == std::numeric_limits<T>::lowest() &&
max == std::numeric_limits<T>::max()),
"Consider using the Finite<T>() domain instead.");
FUZZTEST_INTERNAL_CHECK_PRECONDITION(std::isfinite(max - min),
"Range is too large!");
}
if constexpr (T_is_integer) {
// Find the longest common prefix
// (from the most significant bit to the least significant bit) of
// min_ and max_, and we only mutate the bits after the prefix.
// This way it can somehow restrict the bit flipping range, but it
// may still fail for range like [0b10000000, 0b01111111] which has
// no valid bit flipping mutations.
// We need to split the signed type range to positve range and
// negative range because of their two's complement representation.
if constexpr (T_is_signed) {
if (min_ < 0 && max_ >= 0) {
largest_mutable_bit_negative = BitWidth(~min);
largest_mutable_bit_positive = BitWidth(max);
} else if (min_ >= 0) {
largest_mutable_bit_positive = BitWidth(min ^ max);
} else if (max_ < 0) {
largest_mutable_bit_negative = BitWidth(min ^ max);
}
} else {
largest_mutable_bit_positive = BitWidth(min ^ max);
}
}
}
value_type Init(absl::BitGenRef prng) {
// TODO(sbenzaquen): Add more interesting points in the range.
const T special[] = {min_, max_};
return ChooseOneOr(special, prng, [&] {
return absl::Uniform(absl::IntervalClosedClosed, prng, min_, max_);
});
}
void Mutate(value_type& val, absl::BitGenRef prng, bool only_shrink) {
permanent_dict_candidate_ = std::nullopt;
if (val < min_ || val > max_) {
val = Init(prng);
return;
}
if (only_shrink) {
// Shrink towards zero, limiting to the range.
T limit;
if (max_ <= T{0}) {
limit = max_;
} else if (min_ >= T{0}) {
limit = min_;
} else {
limit = T{0};
}
if (val == limit) return;
val = ShrinkTowards(prng, val, limit);
return;
}
const T prev = val;
do {
// Randomly apply 3 types of mutations, similarly to Arbitrary.
if constexpr (T_is_integer) {
// Random bit flip.
if constexpr (T_is_signed) {
if (absl::Bernoulli(prng, 0.25)) {
if (prev >= 0) {
RandomBitFlip(prng, val, largest_mutable_bit_positive);
} else {
RandomBitFlip(prng, val, largest_mutable_bit_negative);
}
if (max_ < val || val < min_) {
val = absl::Uniform(absl::IntervalClosedClosed, prng, min_, max_);
}
} else {
RandomWalkOrUniformOrDict<5>(prng, val, min_, max_, temporary_dict_,
permanent_dict_,
permanent_dict_candidate_);
}
} else {
if (absl::Bernoulli(prng, 0.25)) {
RandomBitFlip(prng, val, largest_mutable_bit_positive);
if (max_ < val || val < min_) {
val = absl::Uniform(absl::IntervalClosedClosed, prng, min_, max_);
}
} else {
RandomWalkOrUniformOrDict<5>(prng, val, min_, max_, temporary_dict_,
permanent_dict_,
permanent_dict_candidate_);
}
}
} else {
RandomWalkOrUniformOrDict<5>(prng, val, min_, max_, temporary_dict_,
permanent_dict_,
permanent_dict_candidate_);
}
} while (val == prev); // Make sure Mutate really mutates.
}
std::optional<value_type> ParseCorpus(const IRObject& obj) const {
auto as_corpus = obj.ToCorpus<value_type>();
if (!as_corpus || *as_corpus > max_ || *as_corpus < min_) {
return std::nullopt;
}
return as_corpus;
}
auto GetPrinter() const {
if constexpr (std::numeric_limits<T>::is_integer) {
return IntegralPrinter{};
} else {
return FloatingPrinter{};
}
}
void UpdateMemoryDictionary(const value_type& val) {
if constexpr (T_is_memory_dictionary_compatible) {
if (GetExecutionCoverage() != nullptr) {
temporary_dict_.MatchEntriesFromTableOfRecentCompares(
val, GetExecutionCoverage()->GetTablesOfRecentCompares(), min_,
max_);
if (permanent_dict_candidate_.has_value() &&
permanent_dict_.Size() < kPermanentDictMaxSize) {
permanent_dict_.AddEntry(std::move(*permanent_dict_candidate_));
permanent_dict_candidate_ = std::nullopt;
}
}
}
}
private:
T min_;
T max_;
size_t largest_mutable_bit_positive = 0;
size_t largest_mutable_bit_negative = 0;
IntegerDictionaryT temporary_dict_ = {};
IntegerDictionaryT permanent_dict_ = {};
std::optional<T> permanent_dict_candidate_ = std::nullopt;
static constexpr size_t kPermanentDictMaxSize = 512;
};
template <typename T>
class ElementOfImpl : public DomainBase<ElementOfImpl<T>> {
public:
using value_type = T;
enum class corpus_type : size_t;
static constexpr bool has_custom_corpus_type = true;
explicit ElementOfImpl(std::vector<T> values) : values_(values) {
FUZZTEST_INTERNAL_CHECK_PRECONDITION(
!values.empty(), "ElementOf requires a non empty list.");
}
corpus_type Init(absl::BitGenRef prng) {
return corpus_type{absl::Uniform<size_t>(prng, 0, values_.size())};
}
void Mutate(corpus_type& val, absl::BitGenRef prng, bool only_shrink) {
if (values_.size() <= 1) return;
if (only_shrink) {
size_t index = static_cast<size_t>(val);
if (index == 0) return;
index = absl::Uniform<size_t>(prng, 0, index);
val = static_cast<corpus_type>(index);
return;
}
// Choose a different index.
size_t offset = absl::Uniform<size_t>(prng, 1, values_.size());
size_t index = static_cast<size_t>(val);
index += offset;
if (index >= values_.size()) index -= values_.size();
val = static_cast<corpus_type>(index);
}
value_type GetValue(corpus_type value) const {
return values_[static_cast<size_t>(value)];
}
std::optional<corpus_type> FromValue(const value_type& v) const {
// For simple scalar types we try to find them in the list.
// Otherwise, we fail unconditionally because we might not be able to
// effectively compare the values.
// Checking for `operator==` is not enough. You will have false positives
// where `operator==` exists but it either doens't compile or it gives the
// wrong answer.
if constexpr (std::is_enum_v<value_type> ||
std::is_arithmetic_v<value_type> ||
std::is_same_v<std::string, value_type> ||
std::is_same_v<std::string_view, value_type>) {
auto it = std::find(values_.begin(), values_.end(), v);
return it == values_.end() ? std::nullopt
: std::optional(static_cast<corpus_type>(
it - values_.begin()));
}
return std::nullopt;
}
auto GetPrinter() const { return AutodetectTypePrinter<T>(); }
std::optional<corpus_type> ParseCorpus(const IRObject& obj) const {
auto as_corpus = obj.ToCorpus<corpus_type>();
if (!as_corpus || static_cast<size_t>(*as_corpus) >= values_.size())
return std::nullopt;
return as_corpus;
}
IRObject SerializeCorpus(const corpus_type& v) const {
return IRObject::FromCorpus(v);
}
private:
std::vector<T> values_;
};
template <typename... Inner>
class OneOfImpl
: public DomainBase<OneOfImpl<Inner...>,
typename std::tuple_element_t<
0, typename std::tuple<Inner...>>::value_type> {
public:
// All value_types of inner domains must be the same. (Though note that they
// can have different corpus_types!)
using value_type =
typename std::tuple_element_t<0,
typename std::tuple<Inner...>>::value_type;
static_assert(std::conjunction_v<
std::is_same<value_type, typename Inner::value_type>...>,
"All domains in a OneOf must have the same value_type.");
static constexpr bool has_custom_corpus_type = true;
using corpus_type = std::variant<corpus_type_t<Inner>...>;
explicit OneOfImpl(Inner... domains) : domains_(std::move(domains)...) {}
corpus_type Init(absl::BitGenRef prng) {
// TODO(b/191368509): Consider the cardinality of the subdomains to weight
// them.
return Switch<sizeof...(Inner)>(
absl::Uniform(prng, size_t{}, num_domains_), [&](auto I) {
return corpus_type(std::in_place_index<I>,
std::get<I>(domains_).Init(prng));
});
}
void Mutate(corpus_type& val, absl::BitGenRef prng, bool only_shrink) {
// Switch to another domain 1% of the time when not reducing.
if (num_domains_ > 1 && !only_shrink && absl::Bernoulli(prng, 0.01)) {
// Choose a different index.
size_t offset = absl::Uniform<size_t>(prng, 1, num_domains_);
size_t index = static_cast<size_t>(val.index());
index += offset;
if (index >= num_domains_) index -= num_domains_;
Switch<sizeof...(Inner)>(index, [&](auto I) {
auto& domain = std::get<I>(domains_);
val.template emplace<I>(domain.Init(prng));
});
} else {
Switch<sizeof...(Inner)>(val.index(), [&](auto I) {
auto& domain = std::get<I>(domains_);
domain.Mutate(std::get<I>(val), prng, only_shrink);
});
}
}
value_type GetValue(const corpus_type& v) const {
return Switch<sizeof...(Inner)>(v.index(), [&](auto I) -> value_type {
auto domain = std::get<I>(domains_);
return domain.GetValue(std::get<I>(v));
});
}
std::optional<corpus_type> FromValue(const value_type& v) const {
std::optional<corpus_type> res;
const auto try_one_corpus = [&](auto I) {
if (auto inner_res = std::get<I>(domains_).FromValue(v)) {
res.emplace(std::in_place_index<I>, *std::move(inner_res));
return true;
}
return false;
};
ApplyIndex<sizeof...(Inner)>([&](auto... I) {
// Try them in order, break on first success.
(try_one_corpus(I) || ...);
});
return res;
}
auto GetPrinter() const { return OneOfPrinter<Inner...>{domains_}; }
std::optional<corpus_type> ParseCorpus(const IRObject& obj) const {
return ParseWithDomainVariant(domains_, obj);
}
IRObject SerializeCorpus(const corpus_type& v) const {
return SerializeWithDomainVariant(domains_, v);
}
private:
std::tuple<Inner...> domains_;
static_assert(std::tuple_size_v<decltype(domains_)> > 0,
"OneOf requires a non-empty list.");
// For ease of reading.
const size_t num_domains_ = sizeof...(Inner);
};
template <typename T>
class BitFlagCombinationOfImpl
: public DomainBase<BitFlagCombinationOfImpl<T>> {
public:
using value_type = T;
explicit BitFlagCombinationOfImpl(absl::Span<const T> flags)
: flags_(flags.begin(), flags.end()) {
FUZZTEST_INTERNAL_CHECK_PRECONDITION(
!flags.empty(), "BitFlagCombinationOf requires a non empty list.");
// Make sure they are mutually exclusive options, and none are empty.
for (int i = 0; i < flags.size(); ++i) {
T v1 = flags[i];
FUZZTEST_INTERNAL_CHECK_PRECONDITION(
v1 != T{}, "BitFlagCombinationOf requires non zero flags.");
for (int j = i + 1; j < flags.size(); ++j) {
T v2 = flags[j];
FUZZTEST_INTERNAL_CHECK_PRECONDITION(
BitAnd(v1, v2) == T{},
"BitFlagCombinationOf requires flags to be mutually exclusive.");
}
}
}
value_type Init(absl::BitGenRef) { return value_type{}; }
void Mutate(value_type& val, absl::BitGenRef prng, bool only_shrink) {
T to_switch = flags_[ChooseOffset(flags_.size(), prng)];
if (!only_shrink || BitAnd(val, to_switch) != T{}) {
val = BitXor(val, to_switch);
}
}
auto GetPrinter() const { return AutodetectTypePrinter<T>(); }
private:
template <typename U>
static value_type BitAnd(U a, U b) {
if constexpr (std::is_enum_v<U>) {
return BitAnd(static_cast<std::underlying_type_t<U>>(a),
static_cast<std::underlying_type_t<U>>(b));
} else {
return static_cast<value_type>(a & b);
}
}
template <typename U>
static value_type BitXor(U a, U b) {
if constexpr (std::is_enum_v<U>) {
return BitXor(static_cast<std::underlying_type_t<U>>(a),
static_cast<std::underlying_type_t<U>>(b));
} else {
return static_cast<value_type>(a ^ b);
}
}
std::vector<value_type> flags_;
};
class InRegexpImpl : public DomainBase<InRegexpImpl, std::string> {
public:
using DFAPath = std::vector<RegexpDFA::Edge>;
using value_type = std::string;
using corpus_type = DFAPath;
static constexpr bool has_custom_corpus_type = true;
explicit InRegexpImpl(std::string_view regex_str)
: dfa_(RegexpDFA::Create(regex_str)) {}
DFAPath Init(absl::BitGenRef prng) {
std::optional<DFAPath> path =
dfa_.StringToDFAPath(dfa_.GenerateString(prng));
FUZZTEST_INTERNAL_CHECK_PRECONDITION(path.has_value(),
"Init should generate valid paths");
return *path;
}
// Strategy: Parse the input string into a path in the DFA. Pick a node in the
// path and random walk from the node until we reach an end state or go back
// to the original path.
void Mutate(DFAPath& path, absl::BitGenRef prng, bool only_shrink) {
if (only_shrink) {
// Fast path to remove loop.
if (absl::Bernoulli(prng, 0.5)) {
if (ShrinkByRemoveLoop(prng, path)) return;
}
if (ShrinkByFindShorterSubPath(prng, path)) return;
return;
}
int rand_offset = absl::Uniform<int>(prng, 0u, path.size());
// Maps states to the path index of their first appearance. We want the
// mutation to be mininal, so if a state appears multiple times in the path,
// we only keep the index of its first appearance.
std::vector<std::optional<int>> sink_states_first_appearance(
dfa_.state_count());
for (int i = rand_offset; i < path.size(); ++i) {
int state_id = path[i].from_state_id;
if (sink_states_first_appearance[state_id].has_value()) continue;
sink_states_first_appearance[state_id] = i;
}
std::vector<RegexpDFA::Edge> new_subpath = dfa_.FindPath(
prng, path[rand_offset].from_state_id, sink_states_first_appearance);
int to_state_id = new_subpath.back().from_state_id;
new_subpath.pop_back();
DFAPath new_path;
for (size_t i = 0; i < rand_offset; ++i) {
new_path.push_back(path[i]);
}
for (size_t i = 0; i < new_subpath.size(); ++i) {
new_path.push_back(new_subpath[i]);
}
// Found a node in the original path, so we append the remaining substring
// of the original path.
if (sink_states_first_appearance[to_state_id].has_value()) {
for (size_t i = *sink_states_first_appearance[to_state_id];
i < path.size(); ++i) {
new_path.push_back(path[i]);
}
}
FUZZTEST_INTERNAL_CHECK(
dfa_.StringToDFAPath(*dfa_.DFAPathToString(new_path)).has_value(),
"Mutation generate invalid strings");
path = std::move(new_path);
}
auto GetPrinter() const { return StringPrinter{}; }
value_type GetValue(const corpus_type& v) const {
std::optional<std::string> val = dfa_.DFAPathToString(v);
FUZZTEST_INTERNAL_CHECK(val.has_value(), "Corpus is invalid!");
return *val;
}
std::optional<corpus_type> FromValue(const value_type& v) const {
return dfa_.StringToDFAPath(v);
}
IRObject SerializeCorpus(const corpus_type& path) const {
IRObject obj;
auto& subs = obj.MutableSubs();
for (const auto& edge : path) {
subs.push_back(IRObject::FromCorpus(edge.from_state_id));
subs.push_back(IRObject::FromCorpus(edge.edge_index));
}
return obj;
}
std::optional<corpus_type> ParseCorpus(const IRObject& obj) const {
auto subs = obj.Subs();
if (!subs) return std::nullopt;
if (subs->size() % 2 != 0) return std::nullopt;
corpus_type res;
for (size_t i = 0; i < subs->size(); i += 2) {
auto from_state_id = (*subs)[i].ToCorpus<int>();
auto edge_index = (*subs)[i + 1].ToCorpus<int>();
if (!from_state_id.has_value() || !edge_index.has_value())
return std::nullopt;
res.push_back(RegexpDFA::Edge{*from_state_id, *edge_index});
}
// Check whether this is a valid path in the DFA.
if (!dfa_.DFAPathToString(res).has_value()) return std::nullopt;
return res;
}
private:
// Remove a random loop in the DFA path and return the string from the
// modified path. A loop is a subpath that starts and ends with the same
// state.
bool ShrinkByRemoveLoop(absl::BitGenRef prng, DFAPath& path) {
std::vector<std::vector<int>> state_appearances(dfa_.state_count());
for (int i = 0; i < path.size(); ++i) {
state_appearances[path[i].from_state_id].push_back(i);
}
std::vector<int> states_with_loop;
for (int i = 0; i < dfa_.state_count(); ++i) {
if (state_appearances[i].size() > 1) states_with_loop.push_back(i);
}
if (!states_with_loop.empty()) {
int rand_state_id = states_with_loop[absl::Uniform<int>(
prng, 0, states_with_loop.size())];
std::vector<int>& loop_indexes = state_appearances[rand_state_id];
int loop_start = absl::Uniform<int>(prng, 0u, loop_indexes.size() - 1);
int loop_end =
absl::Uniform<int>(prng, loop_start + 1, loop_indexes.size());
// Delete the detected loop.
path.erase(path.begin() + loop_indexes[loop_start],
path.begin() + loop_indexes[loop_end]);
FUZZTEST_INTERNAL_CHECK(
dfa_.StringToDFAPath(*dfa_.DFAPathToString(path)).has_value(),
"The mutated path is invalid!");
return true;
}
return false;
}
// Randomly pick a subpath and try to replace it with a shorter one. As this
// might fail we keep trying until success or the maximum number of trials is
// reached.
bool ShrinkByFindShorterSubPath(absl::BitGenRef prng, DFAPath& path) {
if (path.size() <= 1) {
return false;
}
constexpr int n_trial = 40;
constexpr int max_exploration_length = 100;
for (int i = 0; i < n_trial; ++i) {
// Pick any state in `path` as the start of the subpath, *except* the one
// in the last element.
int from_index = absl::Uniform<int>(prng, 0u, path.size() - 1);
int from_state_id = path[from_index].from_state_id;
// Pick a state after the "from state" as the end of the subpath.
int to_index, to_state_id, length;
if (i <= n_trial / 2) {
// Pick any state in `path` after the "from state" as the end of the
// subpath; this excludes the "end state".
to_index =
absl::Uniform<int>(prng, from_index + 1,
std::min(from_index + max_exploration_length,
static_cast<int>(path.size())));
to_state_id = path[to_index].from_state_id;
length = to_index - from_index;
} else {
// If failing too many times, try to find a shorter path to the
// end_state as a fall back. In this case, to_index isn't the index of
// a valid element in `path`.
to_index = static_cast<int>(path.size());
to_state_id = dfa_.end_state_id();
length = to_index - from_index;
}
if (length == 1) continue;
std::vector<RegexpDFA::Edge> new_subpath = dfa_.FindPathWithinLengthDFS(
prng, from_state_id, to_state_id, length);
// If the size is unchanged, keep trying.
if (new_subpath.size() == length) continue;
DFAPath new_path(path.begin(), path.begin() + from_index);
new_path.insert(new_path.end(), new_subpath.begin(), new_subpath.end());
for (size_t idx = to_index; idx < path.size(); ++idx) {
new_path.push_back(path[idx]);
}
FUZZTEST_INTERNAL_CHECK(
dfa_.StringToDFAPath(*dfa_.DFAPathToString(new_path)).has_value(),
"The mutated path is invalid!");
path = std::move(new_path);
return true;
}
return false;
}
RegexpDFA dfa_;
};
enum class RequireCustomCorpusType { kNo, kYes };
// Helper to determine whether the first domain is directly serializable.
template <typename... Domains>
inline constexpr bool first_is_directly_serializable = false;
template <typename Domain, typename... Domains>
inline constexpr bool first_is_directly_serializable<Domain, Domains...> =
Domain::is_directly_serializable;
template <typename T, RequireCustomCorpusType require_custom, typename... Inner>
class AggregateOfImpl
: public DomainBase<AggregateOfImpl<T, require_custom, Inner...>, T> {
public:
using value_type = T;
// For user defined types (structs) we require a custom corpus_type
// (std::tuple), because the serializer does not support structs, only tuples.
static constexpr bool has_custom_corpus_type =
require_custom == RequireCustomCorpusType::kYes ||
(Inner::has_custom_corpus_type || ...);
using corpus_type =
std::conditional_t<has_custom_corpus_type,
std::tuple<corpus_type_t<Inner>...>, T>;
static constexpr bool is_directly_serializable =
require_custom == RequireCustomCorpusType::kNo && sizeof...(Inner) == 1 &&
first_is_directly_serializable<Inner...>;
AggregateOfImpl() = default;
explicit AggregateOfImpl(std::in_place_t, Inner... inner)
: inner_(std::move(inner)...) {}
corpus_type Init(absl::BitGenRef prng) {
return std::apply(
[&](auto&... inner) { return corpus_type{inner.Init(prng)...}; },
inner_);
}
void Mutate(corpus_type& val, absl::BitGenRef prng, bool only_shrink) {
std::integral_constant<int, sizeof...(Inner)> size;
auto bound = internal::BindAggregate(val, size);
// Filter the tuple to only the mutable fields.
// The const ones can't be mutated.
// Eg in `std::pair<const int, int>` for maps.
static constexpr auto to_mutate =
GetMutableSubtuple<decltype(internal::BindAggregate(std::declval<T&>(),
size))>();
static constexpr size_t actual_size =
std::tuple_size_v<decltype(to_mutate)>;
if constexpr (actual_size > 0) {
int offset = absl::Uniform<int>(prng, 0, actual_size);
Switch<actual_size>(offset, [&](auto I) {
std::get<to_mutate[I]>(inner_).Mutate(std::get<to_mutate[I]>(bound),
prng, only_shrink);
});
}
}
void UpdateMemoryDictionary(const corpus_type& val) {
// Copy codes from Mutate that does the mutable domain filtering things.
std::integral_constant<int, sizeof...(Inner)> size;
auto bound = internal::BindAggregate(val, size);
static constexpr auto to_mutate =
GetMutableSubtuple<decltype(internal::BindAggregate(std::declval<T&>(),
size))>();
static constexpr size_t actual_size =
std::tuple_size_v<decltype(to_mutate)>;
// Apply UpdateMemoryDictionary to every mutable domain.
if constexpr (actual_size > 0) {
ApplyIndex<actual_size>([&](auto... I) {
(std::get<to_mutate[I]>(inner_).UpdateMemoryDictionary(
std::get<to_mutate[I]>(bound)),
...);
});
}
}
int UntypedPrintCorpusValue(const GenericDomainCorpusType& val,
absl::FormatRawSink out, internal::PrintMode mode,
std::optional<int> tuple_elem) const final {
if (tuple_elem.has_value()) {
if constexpr (sizeof...(Inner) != 0) {
if (*tuple_elem >= 0 && *tuple_elem < sizeof...(Inner)) {
Switch<sizeof...(Inner)>(*tuple_elem, [&](auto I) {
PrintValue(std::get<I>(inner_),
std::get<I>(val.GetAs<corpus_type_t<AggregateOfImpl>>()),
out, mode);
});
}
}
} else {
AggregateOfImpl::DomainBase::UntypedPrintCorpusValue(val, out, mode,
std::nullopt);
}
return sizeof...(Inner);
}
auto GetPrinter() const { return AggregatePrinter<Inner...>{inner_}; }
value_type GetValue(const corpus_type& value) const {
if constexpr (has_custom_corpus_type) {
if constexpr (DetectBindableFieldCount<value_type>() ==
DetectBraceInitCount<value_type>()) {
return ApplyIndex<sizeof...(Inner)>([&](auto... I) {
return T{std::get<I>(inner_).GetValue(std::get<I>(value))...};
});
} else {
// Right now the only other possibility is that the bindable field count
// is one less than the brace init field count. In that case, that extra
// field is used to initialize an empty base class. We'll need to update
// this if that ever changes.
return ApplyIndex<sizeof...(Inner)>([&](auto... I) {
return T{{}, std::get<I>(inner_).GetValue(std::get<I>(value))...};
});
}
} else {
return value;
}
}
std::optional<corpus_type> FromValue(const value_type& value) const {
if constexpr (has_custom_corpus_type) {
return ApplyIndex<sizeof...(Inner)>([&](auto... I) {
auto bound = internal::BindAggregate(
value, std::integral_constant<int, sizeof...(Inner)>{});
return [](auto... optional_values) -> std::optional<corpus_type> {
if ((optional_values.has_value() && ...)) {
return std::tuple(*std::move(optional_values)...);
} else {
return std::nullopt;
}
}(std::get<I>(inner_).FromValue(std::get<I>(bound))...);
});
} else {
return value;
}
}
// Use the generic serializer when no custom corpus type is used, since it is
// more efficient. Eg a string value can be serialized as a string instead of
// as a sequence of char values.
std::optional<corpus_type> ParseCorpus(const IRObject& obj) const {
if constexpr (has_custom_corpus_type) {
return ParseWithDomainTuple(inner_, obj);
} else {
return obj.ToCorpus<corpus_type>();
}
}
IRObject SerializeCorpus(const corpus_type& v) const {
if constexpr (has_custom_corpus_type) {
return SerializeWithDomainTuple(inner_, v);
} else {
return IRObject::FromCorpus(v);
}
}
private:
template <typename Tuple>
static constexpr auto GetMutableSubtuple() {
return ApplyIndex<std::tuple_size_v<Tuple>>([](auto... I) {
constexpr auto is_const = [](auto I2) {
return std::is_const_v<
std::remove_reference_t<std::tuple_element_t<I2, Tuple>>>;
};
std::array<int, (!is_const(I) + ... + 0)> res{};
int pos = 0;
((is_const(I) ? I : res[pos++] = I), ...);
return res;
});
}
std::tuple<Inner...> inner_;
};
template <typename T, typename... Elem>
AggregateOfImpl<T, RequireCustomCorpusType::kYes, ArbitraryImpl<Elem>...>
DetectAggregateOfImpl2(std::tuple<Elem&...>);
// Detect the number and types of the fields.
// TODO(sbenzaquen): Verify the compiler error in case we can't detect it and
// improve if possible.
template <typename T, int N = *DetectBindableFieldCount<T>()>
decltype(DetectAggregateOfImpl2<T>(
BindAggregate(std::declval<T&>(), std::integral_constant<int, N>{})))
DetectAggregateOfImpl();
// Specialization for user-defined aggregate types.
template <typename T>
class ArbitraryImpl<
T, std::enable_if_t<std::is_class_v<T> && std::is_aggregate_v<T> &&
// Monostates have their own domain.
!is_monostate_v<T> &&
// std::array uses the Tuple domain.
!is_array_v<T>>>
: public decltype(DetectAggregateOfImpl<T>()) {};
template <typename T, typename U>
class ArbitraryImpl<std::pair<T, U>>
: public AggregateOfImpl<
std::pair<std::remove_const_t<T>, std::remove_const_t<U>>,
std::is_const_v<T> || std::is_const_v<U>
? RequireCustomCorpusType::kYes
: RequireCustomCorpusType::kNo,
ArbitraryImpl<T>, ArbitraryImpl<U>> {};
template <typename... T>
class ArbitraryImpl<std::tuple<T...>, std::enable_if_t<sizeof...(T) != 0>>
: public AggregateOfImpl<std::tuple<T...>, RequireCustomCorpusType::kNo,
ArbitraryImpl<T>...> {};
template <typename T, size_t N>
auto AggregateOfImplForArray() {
return ApplyIndex<N>([&](auto... I) {
return AggregateOfImpl<std::array<T, N>, RequireCustomCorpusType::kNo,
std::enable_if_t<(I >= 0), ArbitraryImpl<T>>...>{};
});
}
template <typename T, size_t N>
class ArbitraryImpl<std::array<T, N>>
: public decltype(AggregateOfImplForArray<T, N>()) {};
template <typename T, typename... Inner>
class VariantOfImpl : public DomainBase<VariantOfImpl<T, Inner...>> {
public:
using value_type = T;
static constexpr bool has_custom_corpus_type = true;
// `T` might be a custom variant type.
// We use std::variant unconditionally to make it simpler.
using corpus_type = std::variant<corpus_type_t<Inner>...>;
VariantOfImpl() = default;
explicit VariantOfImpl(std::in_place_t, Inner... inner)
: inner_(std::move(inner)...) {}
corpus_type Init(absl::BitGenRef prng) {
return Switch<sizeof...(Inner)>(
absl::Uniform(prng, size_t{}, sizeof...(Inner)), [&](auto I) {
return corpus_type(std::in_place_index<I>,
std::get<I>(inner_).Init(prng));
});
}
void Mutate(corpus_type& val, absl::BitGenRef prng, bool only_shrink) {
// Flip a coin to choose between generating a value of an alternative type
// and mutating the value of the current type. Assign more weight to the
// mutating case in order to explore more on a given type before we start
// from scratch again.
if (absl::Bernoulli(prng, 0.2)) {
val = Init(prng);
} else {
Switch<sizeof...(Inner)>(val.index(), [&](auto I) {
std::get<I>(inner_).Mutate(std::get<I>(val), prng, only_shrink);
});
}
}
auto GetPrinter() const { return VariantPrinter<Inner...>{inner_}; }
value_type GetValue(const corpus_type& v) const {
return Switch<sizeof...(Inner)>(v.index(), [&](auto I) -> value_type {
value_type out;
out.template emplace<I>(std::get<I>(inner_).GetValue(std::get<I>(v)));
return out;
});
}
std::optional<corpus_type> FromValue(const value_type& v) const {
return Switch<sizeof...(Inner)>(
v.index(), [&](auto I) -> std::optional<corpus_type> {
if (auto inner_value =
std::get<I>(inner_).FromValue(std::get<I>(v))) {
return corpus_type(std::in_place_index<I>, *std::move(inner_value));
} else {
return std::nullopt;
}
});
}
std::optional<corpus_type> ParseCorpus(const IRObject& obj) const {
return ParseWithDomainVariant(inner_, obj);
}
IRObject SerializeCorpus(const corpus_type& v) const {
return SerializeWithDomainVariant(inner_, v);
}
private:
std::tuple<Inner...> inner_;
};
template <typename... T>
class ArbitraryImpl<std::variant<T...>>
: public VariantOfImpl<std::variant<T...>, ArbitraryImpl<T>...> {};
enum class OptionalPolicy { kWithNull, kWithoutNull, kAlwaysNull };
template <typename T, typename InnerDomain>
class OptionalOfImpl : public DomainBase<OptionalOfImpl<T, InnerDomain>> {
public:
using value_type = T;
static_assert(Requires<T>([](auto x) -> decltype(!x, *x) {}),
"T must be an optional type.");
static constexpr bool has_custom_corpus_type = true;
// `T` might be a custom optional type.
// We use std::variant unconditionally to make it simpler.
using corpus_type = std::variant<std::monostate, corpus_type_t<InnerDomain>>;
explicit OptionalOfImpl(InnerDomain inner)
: inner_(std::move(inner)), policy_(OptionalPolicy::kWithNull) {}
corpus_type Init(absl::BitGenRef prng) {
if (policy_ == OptionalPolicy::kAlwaysNull ||
// 1/2 chance of returning an empty to avoid initialization with large
// entities for recursive data structures. See
// ContainerOfImplBase::ChooseRandomSize for more details.
(policy_ == OptionalPolicy::kWithNull && absl::Bernoulli(prng, 0.5))) {
return corpus_type(std::in_place_index<0>);
} else {
return corpus_type(std::in_place_index<1>, inner_.Init(prng));
}
}
void Mutate(corpus_type& val, absl::BitGenRef prng, bool only_shrink) {
if (policy_ == OptionalPolicy::kAlwaysNull) {
val.template emplace<0>();
return;
}
const bool has_value = val.index() == 1;
if (!has_value) {
// Only add a value if we are not shrinking.
if (!only_shrink) val.template emplace<1>(inner_.Init(prng));
} else if (policy_ == OptionalPolicy::kWithNull &&
absl::Bernoulli(prng, 1. / 100)) {
// 1/100 chance of returning an empty.
val.template emplace<0>();
} else {
inner_.Mutate(std::get<1>(val), prng, only_shrink);
}
}
auto GetPrinter() const {
return OptionalPrinter<OptionalOfImpl, InnerDomain>{*this, inner_};
}
value_type GetValue(const corpus_type& v) const {
if (v.index() == 0) {
FUZZTEST_INTERNAL_CHECK(policy_ != OptionalPolicy::kWithoutNull,
"Value cannot be null!");
return value_type();
}
FUZZTEST_INTERNAL_CHECK(policy_ != OptionalPolicy::kAlwaysNull,
"Value cannot be non-null!");
return value_type(inner_.GetValue(std::get<1>(v)));
}
std::optional<corpus_type> FromValue(const value_type& v) const {
if (!v) {
FUZZTEST_INTERNAL_CHECK(policy_ != OptionalPolicy::kWithoutNull,
"Value cannot be null!");
return corpus_type(std::in_place_index<0>);
}
FUZZTEST_INTERNAL_CHECK(policy_ != OptionalPolicy::kAlwaysNull,
"Value cannot be non-null!");
if (auto inner_value = inner_.FromValue(*v)) {
return corpus_type(std::in_place_index<1>, *std::move(inner_value));
} else {
return std::nullopt;
}
}
std::optional<corpus_type> ParseCorpus(const IRObject& obj) const {
std::optional<corpus_type> result = ParseWithDomainOptional(inner_, obj);
if (!result.has_value()) {
return std::nullopt;
}
bool is_null = std::get_if<std::monostate>(&(*result));
if ((is_null && policy_ == OptionalPolicy::kWithoutNull) ||
(!is_null && policy_ == OptionalPolicy::kAlwaysNull)) {
return std::nullopt;
}
return result;
}
IRObject SerializeCorpus(const corpus_type& v) const {
return SerializeWithDomainOptional(inner_, v);
}
OptionalOfImpl& SetAlwaysNull() {
policy_ = OptionalPolicy::kAlwaysNull;
return *this;
}
OptionalOfImpl& SetWithoutNull() {
policy_ = OptionalPolicy::kWithoutNull;
return *this;
}
uint64_t CountNumberOfFields(const corpus_type& val) {
if (val.index() == 1) {
return inner_.CountNumberOfFields(std::get<1>(val));
}
return 0;
}
uint64_t MutateSelectedField(corpus_type& val, absl::BitGenRef prng,
bool only_shrink,
uint64_t selected_field_index) {
if (val.index() == 1) {
return inner_.MutateSelectedField(std::get<1>(val), prng, only_shrink,
selected_field_index);
}
return 0;
}
InnerDomain Inner() const { return inner_; }
private:
InnerDomain inner_;
OptionalPolicy policy_;
};
template <typename T, typename Inner>
class SmartPointerOfImpl : public DomainBase<SmartPointerOfImpl<T, Inner>> {
// We use the type erased version here to allow for recursion in smart pointer
// domains.
// It helps cut the recursion in type traits (like corpus_type) and the
// indirection avoids having the domain contain itself by value.
using RealInner = Domain<typename T::element_type>;
using InnerFn = const RealInner& (*)();
public:
using value_type = T;
static constexpr bool has_custom_corpus_type = true;
using corpus_type =
std::variant<std::monostate, typename RealInner::corpus_type>;
// Since we allow for recursion in this domain, we want to delay the
// construction of the inner domain. Otherwise we would have an infinite
// recursion of domains being created.
explicit SmartPointerOfImpl(InnerFn fn) : inner_(fn) {}
explicit SmartPointerOfImpl(Inner inner) : inner_(std::move(inner)) {}
corpus_type Init(absl::BitGenRef) {
// Init will always have an empty smart pointer to reduce nesting.
// Otherwise it is very easy to get a stack overflow during Init() when
// there is recursion in the domains.
return corpus_type();
}
void Mutate(corpus_type& val, absl::BitGenRef prng, bool only_shrink) {
const bool has_value = val.index() == 1;
if (!has_value) {
// Only add a value if we are not shrinking.
if (!only_shrink) val.template emplace<1>(GetOrMakeInner().Init(prng));
} else if (absl::Bernoulli(prng, 1. / 100)) {
// 1/100 chance of returning an empty.
val.template emplace<0>();
} else {
GetOrMakeInner().Mutate(std::get<1>(val), prng, only_shrink);
}
}
auto GetPrinter() const {
return OptionalPrinter<SmartPointerOfImpl, RealInner>{
*this, GetOrMakeInnerConst()};
}
value_type GetValue(const corpus_type& v) const {
if (v.index() == 0) return value_type();
return value_type(new auto(GetOrMakeInnerConst().GetValue(std::get<1>(v))));
}
std::optional<corpus_type> FromValue(const value_type& v) const {
if (!v) return corpus_type(std::in_place_index<0>);
if (auto inner_value = GetOrMakeInnerConst().FromValue(*v)) {
return corpus_type(std::in_place_index<1>, *std::move(inner_value));
} else {
return std::nullopt;
}
}
std::optional<corpus_type> ParseCorpus(const IRObject& obj) const {
return ParseWithDomainOptional(GetOrMakeInnerConst(), obj);
}
IRObject SerializeCorpus(const corpus_type& v) const {
return SerializeWithDomainOptional(GetOrMakeInnerConst(), v);
}
private:
RealInner& GetOrMakeInner() {
if (inner_.index() == 0) {
inner_.template emplace<1>(std::get<0>(inner_)());
}
return std::get<1>(inner_);
}
const RealInner& GetOrMakeInnerConst() const {
return inner_.index() == 0 ? std::get<0>(inner_)() : std::get<1>(inner_);
}
// We don't construct it eagerly to avoid an infinite recursion during
// default construction. We only construct the sub domain on demand.
std::variant<InnerFn, RealInner> inner_;
};
template <typename T>
const Domain<T>& GetGlobalDomainDefaultInstance() {
static const auto* instance = new Domain<T>(ArbitraryImpl<T>());
return *instance;
}
template <typename T>
class ArbitraryImpl<std::optional<T>>
: public OptionalOfImpl<std::optional<T>, ArbitraryImpl<T>> {
public:
ArbitraryImpl() : ArbitraryImpl::OptionalOfImpl(ArbitraryImpl<T>()) {}
};
template <typename T>
class ArbitraryImpl<std::unique_ptr<T>>
: public SmartPointerOfImpl<std::unique_ptr<T>, ArbitraryImpl<T>> {
public:
ArbitraryImpl()
: ArbitraryImpl::SmartPointerOfImpl(GetGlobalDomainDefaultInstance) {}
};
template <typename T>
class ArbitraryImpl<std::shared_ptr<T>>
: public SmartPointerOfImpl<std::shared_ptr<T>, ArbitraryImpl<T>> {
public:
ArbitraryImpl()
: ArbitraryImpl::SmartPointerOfImpl(GetGlobalDomainDefaultInstance) {}
};
template <typename Mapper, typename... Inner>
class MapImpl
: public DomainBase<MapImpl<Mapper, Inner...>,
std::decay_t<std::invoke_result_t<
Mapper, const typename Inner::value_type&...>>> {