blob: 018691dcd6a054a0fcd4f4662e91184649a61561 [file] [log] [blame]
// Copyright 2009 The RE2 Authors. All Rights Reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#ifndef RE2_PREFILTER_H_
#define RE2_PREFILTER_H_
// Prefilter is the class used to extract string guards from regexps.
// Rather than using Prefilter class directly, use FilteredRE2.
// See filtered_re2.h
#include <set>
#include <string>
#include <vector>
#include "util/logging.h"
namespace re2 {
class RE2;
class Regexp;
class Prefilter {
// Instead of using Prefilter directly, use FilteredRE2; see filtered_re2.h
enum Op {
ALL = 0, // Everything matches
NONE, // Nothing matches
ATOM, // The string atom() must match
AND, // All in subs() must match
OR, // One of subs() must match
explicit Prefilter(Op op);
Op op() { return op_; }
const std::string& atom() const { return atom_; }
void set_unique_id(int id) { unique_id_ = id; }
int unique_id() const { return unique_id_; }
// The children of the Prefilter node.
std::vector<Prefilter*>* subs() {
DCHECK(op_ == AND || op_ == OR);
return subs_;
// Set the children vector. Prefilter takes ownership of subs and
// subs_ will be deleted when Prefilter is deleted.
void set_subs(std::vector<Prefilter*>* subs) { subs_ = subs; }
// Given a RE2, return a Prefilter. The caller takes ownership of
// the Prefilter and should deallocate it. Returns NULL if Prefilter
// cannot be formed.
static Prefilter* FromRE2(const RE2* re2);
// Returns a readable debug string of the prefilter.
std::string DebugString() const;
template <typename H>
friend H AbslHashValue(H h, const Prefilter& a) {
h = H::combine(std::move(h), a.op_);
if (a.op_ == ATOM) {
h = H::combine(std::move(h), a.atom_);
} else if (a.op_ == AND || a.op_ == OR) {
h = H::combine(std::move(h), a.subs_->size());
for (size_t i = 0; i < a.subs_->size(); ++i) {
h = H::combine(std::move(h), (*a.subs_)[i]->unique_id_);
return h;
friend bool operator==(const Prefilter& a, const Prefilter& b) {
if (&a == &b) {
return true;
if (a.op_ != b.op_) {
return false;
if (a.op_ == ATOM) {
if (a.atom_ != b.atom_) {
return false;
} else if (a.op_ == AND || a.op_ == OR) {
if (a.subs_->size() != b.subs_->size()) {
return false;
for (size_t i = 0; i < a.subs_->size(); ++i) {
if ((*a.subs_)[i]->unique_id_ != (*b.subs_)[i]->unique_id_) {
return false;
return true;
// A comparator used to store exact strings. We compare by length,
// then lexicographically. This ordering makes it easier to reduce the
// set of strings in SimplifyStringSet.
struct LengthThenLex {
bool operator()(const std::string& a, const std::string& b) const {
return (a.size() < b.size()) || (a.size() == b.size() && a < b);
class Info;
using SSet = std::set<std::string, LengthThenLex>;
using SSIter = SSet::iterator;
using ConstSSIter = SSet::const_iterator;
// Combines two prefilters together to create an AND. The passed
// Prefilters will be part of the returned Prefilter or deleted.
static Prefilter* And(Prefilter* a, Prefilter* b);
// Combines two prefilters together to create an OR. The passed
// Prefilters will be part of the returned Prefilter or deleted.
static Prefilter* Or(Prefilter* a, Prefilter* b);
// Generalized And/Or
static Prefilter* AndOr(Op op, Prefilter* a, Prefilter* b);
static Prefilter* FromRegexp(Regexp* a);
static Prefilter* FromString(const std::string& str);
static Prefilter* OrStrings(SSet* ss);
static Info* BuildInfo(Regexp* re);
Prefilter* Simplify();
// Removes redundant strings from the set. A string is redundant if
// any of the other strings appear as a substring. The empty string
// is a special case, which is ignored.
static void SimplifyStringSet(SSet* ss);
// Adds the cross-product of a and b to dst.
// (For each string i in a and j in b, add i+j.)
static void CrossProduct(const SSet& a, const SSet& b, SSet* dst);
// Kind of Prefilter.
Op op_;
// Sub-matches for AND or OR Prefilter.
std::vector<Prefilter*>* subs_;
// Actual string to match in leaf node.
std::string atom_;
// If different prefilters have the same string atom, or if they are
// structurally the same (e.g., OR of same atom strings) they are
// considered the same unique nodes. This is the id for each unique
// node. This field is populated with a unique id for every node,
// and -1 for duplicate nodes.
int unique_id_;
Prefilter(const Prefilter&) = delete;
Prefilter& operator=(const Prefilter&) = delete;
} // namespace re2
#endif // RE2_PREFILTER_H_