blob: 8032fa9ca20b98d381a64cab7fa74861d74db1e4 [file] [log] [blame]
// Copyright 2023 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_FEATURE_SET_H_
#define THIRD_PARTY_CENTIPEDE_FEATURE_SET_H_
#include <cstddef>
#include <cstdint>
#include <string>
#include <vector>
#include "absl/container/flat_hash_set.h"
#include "./centipede/coverage.h"
#include "./centipede/defs.h"
#include "./centipede/feature.h"
#include "./centipede/util.h"
namespace centipede {
// Set of features with their frequencies.
// Features that have a frequency >= frequency_threshold
// are considered too frequent and thus less interesting for further fuzzing.
// All features must be in [0, feature_domains::kLastDomain.begin()).
class FeatureSet {
public:
explicit FeatureSet(uint8_t frequency_threshold)
: frequency_threshold_(frequency_threshold) {}
// Returns the number of features in `features` not present in `this`.
// Removes all features from `features` that are too frequent.
size_t CountUnseenAndPruneFrequentFeatures(FeatureVec &features) const;
// For every feature in `features` increment its frequency.
// If a feature wasn't seen before, it is added to `this`.
void IncrementFrequencies(const FeatureVec &features);
// How many different features are in the set.
size_t size() const { return num_features_; }
// Returns features that originate from CFG counters, converted to PCIndexVec.
PCIndexVec ToCoveragePCs() const;
// Returns the number of features in `this` from the given feature domain.
size_t CountFeatures(feature_domains::Domain domain);
// Returns the frequency associated with `feature`.
size_t Frequency(feature_t feature) const { return frequencies_[feature]; }
// Computes combined weight of `features`.
// The less frequent the feature is, the bigger its weight.
// The weight of a FeatureVec is a sum of individual feature weights.
uint64_t ComputeWeight(const FeatureVec &features) const;
// Returns a debug string representing the state of *this.
std::string DebugString() const;
private:
// Computes the frequency threshold based on the domain of `feature`.
// For now, just uses 1 for kPCPair and frequency_threshold_ for all others.
// Rationale: the kPCPair features might be too numerous, we don't want to
// store more than one of each such feature in the corpus.
uint8_t FrequencyThreshold(feature_t feature) const {
if (feature_domains::kPCPair.Contains(feature)) return 1;
return frequency_threshold_;
}
const uint8_t frequency_threshold_;
static constexpr size_t kSize = feature_domains::kLastDomain.begin();
// Maps features to their frequencies.
// This array is huge but sparse, and depending on the enabled features
// some parts of it will never be written to or read from.
// Unused parts of MmapNoReserveArray don't actually reserve memory.
MmapNoReserveArray<kSize> frequencies_;
// Counts all unique features added to this.
size_t num_features_ = 0;
// Counts features in each domain.
size_t features_per_domain_[feature_domains::kLastDomain.domain_id()] = {};
// Maintains the set of PC indices that correspond to added features.
absl::flat_hash_set<PCIndex> pc_index_set_;
};
} // namespace centipede
#endif // THIRD_PARTY_CENTIPEDE_FEATURE_SET_H_