Serialize strings directly instead of wrapping them in the FuzzTest format.
PiperOrigin-RevId: 506433409
diff --git a/e2e_tests/functional_test.cc b/e2e_tests/functional_test.cc
index e4b5694..e81379c 100644
--- a/e2e_tests/functional_test.cc
+++ b/e2e_tests/functional_test.cc
@@ -795,7 +795,7 @@
auto replay_files = ReadFileOrDirectory(out_dir.dirname());
ASSERT_EQ(replay_files.size(), 1) << std_err;
- auto parsed = IRObject::FromString(replay_files[0].data);
+ auto parsed = IRObject::FromString(replay_files[0].data, /*directly=*/true);
ASSERT_TRUE(parsed) << std_err;
auto args = parsed->ToCorpus<std::tuple<std::string>>();
EXPECT_THAT(args, Optional(FieldsAre(StartsWith("Fuzz")))) << std_err;
@@ -930,12 +930,22 @@
minimized_corpus_files.size() + 2)))));
}
+template <typename T>
+constexpr bool is_nested_string = false;
+
+template <>
+constexpr bool is_nested_string<std::string> = true;
+
+template <typename T>
+constexpr bool is_nested_string<std::tuple<T>> = is_nested_string<T>;
+
class ReplayFile {
public:
template <typename T>
ReplayFile(std::in_place_t, const T& corpus) {
filename_ = absl::StrCat(dir_.dirname(), "/replay_file");
- WriteFile(filename_, internal::IRObject::FromCorpus(corpus).ToString());
+ WriteFile(filename_, internal::IRObject::FromCorpus(corpus).ToString(
+ /*directly=*/is_nested_string<T>));
}
auto GetReplayEnv() const {
@@ -1094,7 +1104,7 @@
auto replay_files = ReadFileOrDirectory(out_dir.dirname());
ASSERT_EQ(replay_files.size(), 1) << std_err;
- auto parsed = IRObject::FromString(replay_files[0].data);
+ auto parsed = IRObject::FromString(replay_files[0].data, /*directly=*/true);
ASSERT_TRUE(parsed) << std_err;
auto args = parsed->ToCorpus<std::tuple<std::string>>();
ASSERT_THAT(args, Optional(FieldsAre(HasSubstr("X"))));
diff --git a/fuzztest/BUILD b/fuzztest/BUILD
index d564647..6769096 100644
--- a/fuzztest/BUILD
+++ b/fuzztest/BUILD
@@ -348,6 +348,7 @@
srcs = ["internal/serialization.cc"],
hdrs = ["internal/serialization.h"],
deps = [
+ ":logging",
":meta",
"@com_google_absl//absl/numeric:int128",
"@com_google_absl//absl/strings",
diff --git a/fuzztest/domain.h b/fuzztest/domain.h
index 1cb121c..0a73dc1 100644
--- a/fuzztest/domain.h
+++ b/fuzztest/domain.h
@@ -63,6 +63,7 @@
using value_type = T;
using corpus_type = internal::GenericDomainCorpusType;
static constexpr bool has_custom_corpus_type = true;
+ static constexpr bool is_directly_serializable = false;
template <typename Inner>
Domain(const internal::DomainBase<Inner, T>& inner)
diff --git a/fuzztest/internal/domain.h b/fuzztest/internal/domain.h
index f2e8d4a..a143302 100644
--- a/fuzztest/internal/domain.h
+++ b/fuzztest/internal/domain.h
@@ -104,6 +104,10 @@
const GenericDomainCorpusType& val, absl::FormatRawSink out,
internal::PrintMode mode,
std::optional<int> tuple_elem = std::nullopt) const = 0;
+
+ // Returns true if the corpus value should be serialized directly as a string
+ // instead of in the FuzzTest format.
+ virtual bool IsDirectlySerializable() const = 0;
};
// A typed subinterface that provides the methods to handle `value_type`
@@ -194,6 +198,10 @@
v.template GetAs<corpus_type_t<Derived>>());
}
+ bool IsDirectlySerializable() const final {
+ return Derived::is_directly_serializable;
+ }
+
uint64_t UntypedCountNumberOfFields(const GenericDomainCorpusType& v) final {
return derived().CountNumberOfFields(v.GetAs<corpus_type_t<Derived>>());
}
@@ -256,6 +264,10 @@
static constexpr bool has_custom_corpus_type = false;
+ // Indicates that the corpus value can be serialized directly as a string,
+ // without using the intermediate serialization format.
+ static constexpr bool is_directly_serializable = false;
+
private:
Derived& derived() { return static_cast<Derived&>(*this); }
const Derived& derived() const { return static_cast<const Derived&>(*this); }
@@ -718,6 +730,11 @@
auto SerializeWithDomainTuple(
const std::tuple<Domain...>& domains,
const std::tuple<corpus_type_t<Domain>...>& corpus) {
+ // Flatten singleton tuples to enable direct serialization of
+ // directly-serializable domains nested in singleton tuples.
+ if constexpr (sizeof...(Domain) == 1) {
+ return std::get<0>(domains).SerializeCorpus(std::get<0>(corpus));
+ }
IRObject obj;
auto& subs = obj.MutableSubs();
ApplyIndex<sizeof...(Domain)>([&](auto... I) {
@@ -731,16 +748,25 @@
template <typename... Domain>
std::optional<std::tuple<corpus_type_t<Domain>...>> ParseWithDomainTuple(
const std::tuple<Domain...>& domains, const IRObject& obj, int skip = 0) {
+ auto parse_subs = [&](absl::Span<const IRObject> subs) {
+ return ApplyIndex<sizeof...(Domain)>([&](auto... I) {
+ return [](auto... opts) {
+ return (!opts || ...)
+ ? std::nullopt
+ : std::optional(std::tuple<corpus_type_t<Domain>...>{
+ *std::move(opts)...});
+ }(std::get<I>(domains).ParseCorpus((subs)[I + skip])...);
+ });
+ };
+ // Reverse the flattening of singleton tuples done by
+ // SerializeWithDomainTuple() by treating `obj` as a subobject, thus
+ // effectively wrapping the parsed result in a tuple.
+ if (sizeof...(Domain) == 1 && skip == 0) {
+ return parse_subs({obj});
+ }
auto subs = obj.Subs();
if (!subs || subs->size() != sizeof...(Domain) + skip) return std::nullopt;
- return ApplyIndex<sizeof...(Domain)>([&](auto... I) {
- return [](auto... opts) {
- return (!opts || ...)
- ? std::nullopt
- : std::optional(std::tuple<corpus_type_t<Domain>...>{
- *std::move(opts)...});
- }(std::get<I>(domains).ParseCorpus((*subs)[I + skip])...);
- });
+ return parse_subs(*subs);
}
template <typename T, typename = void>
@@ -950,6 +976,9 @@
InnerDomainT::has_custom_corpus_type;
// `corpus_type` might be immutable (eg std::pair<const int, int> for maps
// inner domain). We store them in a std::list to allow for this.
+ using corpus_type =
+ std::conditional_t<has_custom_corpus_type,
+ std::list<corpus_type_t<InnerDomainT>>, value_type>;
// Some container mutation only applies to vector or string types which do
// not have a custom corpus type.
@@ -957,6 +986,15 @@
!has_custom_corpus_type &&
(is_vector_v<value_type> || std::is_same_v<value_type, std::string>);
+ static constexpr bool is_vector_of_uint8_or_char =
+ !has_custom_corpus_type && is_vector_v<value_type> &&
+ (std::is_same_v<ExtractTemplateParameter<0, value_type>, uint8_t> ||
+ std::is_same_v<ExtractTemplateParameter<0, value_type>, char>);
+
+ static constexpr bool is_directly_serializable =
+ !has_custom_corpus_type &&
+ (is_vector_of_uint8_or_char || std::is_same_v<value_type, std::string>);
+
// The current implementation of container dictionary only supports
// vector or string container value_type, whose InnerDomain is
// an `ArbitraryImpl<T2>` where T2 is an integral type.
@@ -964,10 +1002,6 @@
is_memory_dictionary_compatible<InnerDomainT>::value &&
is_vector_or_string;
- using corpus_type =
- std::conditional_t<has_custom_corpus_type,
- std::list<corpus_type_t<InnerDomainT>>, value_type>;
-
// If `!container_has_memory_dict`, dict_type is a bool and dict
// is not used. This conditional_t may be neccessary because some
// value_type may not have copy constructors(for example, proto).
@@ -1169,6 +1203,8 @@
subs.push_back(inner_.SerializeCorpus(elem));
}
return obj;
+ } else if constexpr (is_vector_of_uint8_or_char) {
+ return IRObject::FromCorpus(std::string{v.begin(), v.end()});
} else {
return IRObject::FromCorpus(v);
}
@@ -1224,6 +1260,10 @@
}
}
return res;
+ } else if constexpr (is_vector_of_uint8_or_char) {
+ std::optional<std::string> str = obj.ToCorpus<std::string>();
+ if (!str.has_value()) return std::nullopt;
+ return corpus_type{str->begin(), str->end()};
} else {
return obj.ToCorpus<corpus_type>();
}
@@ -2029,6 +2069,14 @@
enum class RequireCustomCorpusType { kNo, kYes };
+// Helper to determine whether the first domain is directly serializable.
+template <typename... Domains>
+inline constexpr bool first_is_directly_serializable = false;
+
+template <typename Domain, typename... Domains>
+inline constexpr bool first_is_directly_serializable<Domain, Domains...> =
+ Domain::is_directly_serializable;
+
template <typename T, RequireCustomCorpusType require_custom, typename... Inner>
class AggregateOfImpl
: public DomainBase<AggregateOfImpl<T, require_custom, Inner...>, T> {
@@ -2043,6 +2091,10 @@
std::conditional_t<has_custom_corpus_type,
std::tuple<corpus_type_t<Inner>...>, T>;
+ static constexpr bool is_directly_serializable =
+ require_custom == RequireCustomCorpusType::kNo && sizeof...(Inner) == 1 &&
+ first_is_directly_serializable<Inner...>;
+
AggregateOfImpl() = default;
explicit AggregateOfImpl(std::in_place_t, Inner... inner)
: inner_(std::move(inner)...) {}
@@ -2872,6 +2924,7 @@
// out-of-bounds bugs.
using corpus_type = std::vector<Char>;
static constexpr bool has_custom_corpus_type = true;
+ static constexpr bool is_directly_serializable = true;
corpus_type Init(absl::BitGenRef prng) { return inner_.Init(prng); }
@@ -2894,11 +2947,13 @@
}
std::optional<corpus_type> ParseCorpus(const IRObject& obj) const {
- return obj.ToCorpus<corpus_type>();
+ std::optional<std::string> str = obj.ToCorpus<std::string>();
+ if (!str.has_value()) return std::nullopt;
+ return corpus_type{str->begin(), str->end()};
}
IRObject SerializeCorpus(const corpus_type& v) const {
- return IRObject::FromCorpus(v);
+ return IRObject::FromCorpus(std::string{v.begin(), v.end()});
}
private:
diff --git a/fuzztest/internal/runtime.cc b/fuzztest/internal/runtime.cc
index c9c3ccd..216d5df 100644
--- a/fuzztest/internal/runtime.cc
+++ b/fuzztest/internal/runtime.cc
@@ -66,7 +66,7 @@
void Runtime::DumpReproducer(std::string_view outdir) const {
const std::string content =
current_args_->domain.UntypedSerializeCorpus(current_args_->corpus_value)
- .ToString();
+ .ToString(current_args_->domain.IsDirectlySerializable());
const std::string filename = WriteDataToDir(content, outdir);
if (filename.empty()) {
@@ -291,7 +291,8 @@
}
std::optional<corpus_type> FuzzTestFuzzerImpl::TryParse(std::string_view data) {
- if (auto parsed = IRObject::FromString(data)) {
+ if (auto parsed = IRObject::FromString(
+ data, params_domain_->IsDirectlySerializable())) {
return params_domain_->UntypedParseCorpus(*parsed);
}
return std::nullopt;
@@ -311,7 +312,8 @@
PRNG prng(seed_sequence_);
const auto original_serialized =
- params_domain_->UntypedSerializeCorpus(*to_minimize).ToString();
+ params_domain_->UntypedSerializeCorpus(*to_minimize)
+ .ToString(params_domain_->IsDirectlySerializable());
// In minimize mode we keep mutating the given reproducer value with
// `only_shrink=true` until we crash. We drop mutations that don't
@@ -329,8 +331,8 @@
num_mutations = std::max(1, num_mutations - 1);
// We compare the serialized version. Not very efficient but works for
// now.
- if (params_domain_->UntypedSerializeCorpus(copy).ToString() ==
- original_serialized)
+ if (params_domain_->UntypedSerializeCorpus(copy).ToString(
+ params_domain_->IsDirectlySerializable()) == original_serialized)
continue;
RunOneInput({std::move(copy)});
}
@@ -517,9 +519,9 @@
void FuzzTestFuzzerImpl::TryWriteCorpusFile(const Input& input) {
if (corpus_out_dir_.empty()) return;
- if (WriteDataToDir(
- params_domain_->UntypedSerializeCorpus(input.args).ToString(),
- corpus_out_dir_)
+ if (WriteDataToDir(params_domain_->UntypedSerializeCorpus(input.args)
+ .ToString(params_domain_->IsDirectlySerializable()),
+ corpus_out_dir_)
.empty()) {
absl::FPrintF(GetStderr(), "[!] Failed to write corpus file.\n");
}
@@ -666,8 +668,9 @@
// minimized anymore.
if (params_domain_
->UntypedSerializeCorpus(minimal_non_fatal_counterexample_->args)
- .ToString() !=
- params_domain_->UntypedSerializeCorpus(copy.args).ToString()) {
+ .ToString(params_domain_->IsDirectlySerializable()) !=
+ params_domain_->UntypedSerializeCorpus(copy.args).ToString(
+ params_domain_->IsDirectlySerializable())) {
runtime_.SetExternalFailureDetected(false);
RunOneInput(copy);
if (runtime_.external_failure_detected()) {
diff --git a/fuzztest/internal/serialization.cc b/fuzztest/internal/serialization.cc
index 587f5f3..6138968 100644
--- a/fuzztest/internal/serialization.cc
+++ b/fuzztest/internal/serialization.cc
@@ -27,6 +27,7 @@
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
+#include "./fuzztest/internal/logging.h"
namespace fuzztest::internal {
@@ -173,13 +174,21 @@
} // namespace
-std::string IRObject::ToString() const {
+std::string IRObject::ToString(bool directly) const {
+ if (directly) {
+ FUZZTEST_INTERNAL_CHECK_PRECONDITION(
+ std::holds_alternative<std::string>(value),
+ "IRObject must hold std::string to be directly serializable.");
+ return std::get<std::string>(value);
+ }
std::string out = absl::StrCat(AsAbsl(kHeader), "\n");
std::visit(OutputVisitor{value.index(), 0, out}, value);
return out;
}
-std::optional<IRObject> IRObject::FromString(std::string_view str) {
+std::optional<IRObject> IRObject::FromString(std::string_view str,
+ bool directly) {
+ if (directly) return IRObject(std::string(str));
IRObject object;
if (ReadToken(str) != kHeader) return std::nullopt;
if (!ParseImpl(object, str) || !ReadToken(str).empty()) return std::nullopt;
diff --git a/fuzztest/internal/serialization.h b/fuzztest/internal/serialization.h
index 3f91d9f..1a7b51b 100644
--- a/fuzztest/internal/serialization.h
+++ b/fuzztest/internal/serialization.h
@@ -182,6 +182,11 @@
return obj;
} else {
// Must be a tuple like object.
+ // If it's a singleton tuple, flatten it to enable direct serialization of
+ // types like std::tuple<std::string>.
+ if constexpr (std::tuple_size_v<T> == 1) {
+ return FromCorpus(std::get<0>(value));
+ }
return std::apply(
[](const auto&... elem) {
IRObject obj;
@@ -245,6 +250,12 @@
return out;
} else {
// Must be a tuple like object.
+ // If it's a singleton tuple, reverse the flattening done in FromCorpus.
+ if constexpr (std::tuple_size_v<T> == 1) {
+ auto part = ToCorpus<std::tuple_element_t<0, T>>();
+ if (!part.has_value()) return std::nullopt;
+ return T{*std::move(part)};
+ }
auto elems = Subs();
if (!elems || elems->size() != std::tuple_size_v<T>) return std::nullopt;
auto it = elems->begin();
@@ -260,10 +271,15 @@
}
}
- // Serialize the object as a string. This is used to persist the object on
- // files for reproducing bugs later.
- std::string ToString() const;
- static std::optional<IRObject> FromString(std::string_view str);
+ // Serializes the object as a string. This is used to persist the object in a
+ // file for reproducing bugs later. If `directly` is true, the object is
+ // assumed to contain a single string value, which is then returned directly.
+ std::string ToString(bool directly = false) const;
+
+ // Deserializes `str` into an IRObject. If `directly` is true, returns an
+ // IRObject that contains `str` as a single value.
+ static std::optional<IRObject> FromString(std::string_view str,
+ bool directly = false);
private:
template <typename T>