Fuzz `RE2::Set` and `FilteredRE2`.
Change-Id: Ief0a26ce80211e444580c0a03528d678081e4bef
Reviewed-on: https://code-review.googlesource.com/c/re2/+/61050
Reviewed-by: Alex Chernyakhovsky <achernya@google.com>
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
index f2863b9..b39ea3d 100644
--- a/re2/fuzzing/re2_fuzzer.cc
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -9,8 +9,10 @@
#include <string>
#include <vector>
+#include "re2/filtered_re2.h"
#include "re2/re2.h"
#include "re2/regexp.h"
+#include "re2/set.h"
#include "re2/walker-inl.h"
using re2::StringPiece;
@@ -96,7 +98,7 @@
};
void TestOneInput(StringPiece pattern, const RE2::Options& options,
- StringPiece text) {
+ RE2::Anchor anchor, StringPiece text) {
// Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
// Otherwise, we will waste time on inputs that have long runs of various
// character classes. The fuzzer has shown itself to be easily capable of
@@ -209,6 +211,29 @@
dummy += re.NamedCapturingGroups().size();
dummy += re.CapturingGroupNames().size();
dummy += RE2::QuoteMeta(pattern).size();
+
+ RE2::Set set(options, anchor);
+ int index = set.Add(pattern, /*error=*/NULL); // -1 on error
+ if (index != -1 && set.Compile()) {
+ std::vector<int> matches;
+ set.Match(text, &matches);
+ }
+
+ re2::FilteredRE2 filter;
+ index = -1; // not clobbered on error
+ filter.Add(pattern, options, &index);
+ if (index != -1) {
+ std::vector<std::string> atoms;
+ filter.Compile(&atoms);
+ // Pretend that all atoms match, which
+ // triggers the AND-OR tree maximally.
+ std::vector<int> matched_atoms;
+ matched_atoms.reserve(atoms.size());
+ for (size_t i = 0; i < atoms.size(); ++i)
+ matched_atoms.push_back(static_cast<int>(i));
+ std::vector<int> matches;
+ filter.AllMatches(text, matched_atoms, &matches);
+ }
}
// Entry point for libFuzzer.
@@ -242,9 +267,17 @@
options.set_word_boundary(fdp.ConsumeBool());
options.set_one_line(fdp.ConsumeBool());
+ // ConsumeEnum<RE2::Anchor>() would require RE2::Anchor to specify
+ // kMaxValue, so just use PickValueInArray<RE2::Anchor>() instead.
+ RE2::Anchor anchor = fdp.PickValueInArray<RE2::Anchor>({
+ RE2::UNANCHORED,
+ RE2::ANCHOR_START,
+ RE2::ANCHOR_BOTH,
+ });
+
std::string pattern = fdp.ConsumeRandomLengthString(999);
std::string text = fdp.ConsumeRandomLengthString(999);
- TestOneInput(pattern, options, text);
+ TestOneInput(pattern, options, anchor, text);
return 0;
}
diff --git a/util/fuzz.cc b/util/fuzz.cc
index 9cac118..725cad8 100644
--- a/util/fuzz.cc
+++ b/util/fuzz.cc
@@ -10,12 +10,12 @@
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
int main(int argc, char** argv) {
- uint8_t data[32];
- for (int i = 0; i < 32; i++) {
- for (int j = 0; j < 32; j++) {
+ uint8_t data[4096];
+ for (int i = 0; i < 4096; i++) {
+ for (int j = 0; j < 4096; j++) {
data[j] = random() & 0xFF;
}
- LLVMFuzzerTestOneInput(data, 32);
+ LLVMFuzzerTestOneInput(data, 4096);
}
return 0;
}