blob: 25318880551bdbb7612fb4b71d56d84727b976ef [file] [log] [blame]
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef THIRD_PARTY_CENTIPEDE_BYTE_ARRAY_MUTATOR_H_
#define THIRD_PARTY_CENTIPEDE_BYTE_ARRAY_MUTATOR_H_
#include <array>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <limits>
#include <string_view>
#include <vector>
#include "./centipede/defs.h"
#include "./centipede/execution_metadata.h"
#include "./centipede/knobs.h"
#include "./centipede/mutation_input.h"
namespace centipede {
// A simple class representing an array of up to kMaxEntrySize bytes.
class DictEntry {
public:
static constexpr uint8_t kMaxEntrySize = 15;
explicit DictEntry(ByteSpan bytes)
: bytes_{}, // initialize bytes_ to all zeros
size_(bytes.size()) {
if (size_ > kMaxEntrySize) __builtin_trap();
memcpy(bytes_, bytes.data(), bytes.size());
}
const uint8_t *begin() const { return bytes_; }
const uint8_t *end() const { return bytes_ + size_; }
size_t size() const { return size_; }
bool operator<(const DictEntry &other) const {
return memcmp(this, &other, sizeof(*this)) < 0;
}
private:
// bytes_ must go first so that operator < is lexicographic.
uint8_t bytes_[kMaxEntrySize];
uint8_t size_; // between kMinEntrySize and kMaxEntrySize.
};
// Dictionary of CMP args.
// Maintains an easy-to-query set of pairs {A,B}, such that
// an instruction `A CMP B` has been observed.
class CmpDictionary {
public:
static constexpr size_t kMinEntrySize = 2; // 1-byte entries won't be added.
CmpDictionary() = default;
// Sets the dictionary from execution `metadata`.
//
// Returns false on bad metadata, true otherwise.
bool SetFromMetadata(const ExecutionMetadata &metadata);
// Clears `suggestions` on entry.
// For every observed `A CMP B` such that `A` is a prefix of `bytes`,
// adds `B` to `suggestions`.
// `suggestions`, is filled up to capacity(), but not more.
void SuggestReplacement(ByteSpan bytes,
std::vector<ByteSpan> &suggestions) const;
// Returns the number of dictionary entries.
size_t size() const { return dictionary_.size(); }
private:
using Pair = std::pair<DictEntry, DictEntry>;
std::vector<Pair> dictionary_;
};
// This class allows to mutate a ByteArray in different ways.
// All mutations expect and guarantee that `data` remains non-empty
// since there is only one possible empty input and it's uninteresting.
//
// This class is thread-compatible.
// Typical usage is to have one such object per thread.
class ByteArrayMutator {
public:
// CTOR. Initializes the internal RNG with `seed` (`seed` != 0).
// Keeps a const reference to `knobs` throughout the lifetime.
ByteArrayMutator(const Knobs &knobs, uintptr_t seed)
: rng_(seed), knobs_(knobs) {
if (seed == 0) __builtin_trap(); // We don't include logging.h here.
}
// Adds `dict_entries` to an internal dictionary.
void AddToDictionary(const std::vector<ByteArray> &dict_entries);
// Populates the internal CmpDictionary using execution `metadata`.
// Returns false on failure, true otherwise.
bool SetMetadata(const ExecutionMetadata &metadata) {
return cmp_dictionary_.SetFromMetadata(metadata);
}
// Takes non-empty `inputs`, produces `num_mutants` mutations in `mutants`.
// Old contents of `mutants` are discarded.
void MutateMany(const std::vector<MutationInputRef> &inputs,
size_t num_mutants, std::vector<ByteArray> &mutants);
using CrossOverFn = void (ByteArrayMutator::*)(ByteArray &,
const ByteArray &);
// Mutates `data` by inserting a random part from `other`.
void CrossOverInsert(ByteArray &data, const ByteArray &other);
// Mutates `data` by overwriting some of it with a random part of `other`.
void CrossOverOverwrite(ByteArray &data, const ByteArray &other);
// Applies one of {CrossOverOverwrite, CrossOverInsert}.
void CrossOver(ByteArray &data, const ByteArray &other);
// Type for a Mutator member-function.
// Every mutator function takes a ByteArray& as an input, mutates it in place
// and returns true if mutation took place. In some cases mutation may fail
// to happen, e.g. if EraseBytes() is called on a 1-byte input.
// Fn is test-only public.
using Fn = bool (ByteArrayMutator::*)(ByteArray &);
// All public functions below are mutators.
// They return true iff a mutation took place.
// Applies some random mutation to data.
bool Mutate(ByteArray &data);
// Applies some random mutation that doesn't change size.
bool MutateSameSize(ByteArray &data);
// Applies some random mutation that decreases size.
bool MutateDecreaseSize(ByteArray &data);
// Applies some random mutation that increases size.
bool MutateIncreaseSize(ByteArray &data);
// Flips a random bit.
bool FlipBit(ByteArray &data);
// Swaps two bytes.
bool SwapBytes(ByteArray &data);
// Changes a random byte to a random value.
bool ChangeByte(ByteArray &data);
// Overwrites a random part of `data` with a random dictionary entry.
bool OverwriteFromDictionary(ByteArray &data);
// Overwrites a random part of `data` with an entry suggested by the internal
// CmpDictionary.
bool OverwriteFromCmpDictionary(ByteArray &data);
// Inserts random bytes.
bool InsertBytes(ByteArray &data);
// Inserts a random dictionary entry at random position.
bool InsertFromDictionary(ByteArray &data);
// Erases random bytes.
bool EraseBytes(ByteArray &data);
// Set size alignment for mutants with modified sizes. Some mutators do not
// change input size, but mutators that insert or erase bytes will produce
// mutants with aligned sizes (if possible).
//
// Returns true if new size alignment was accepted. Returns false if max
// length is not a multiple of the specified size alignment.
bool set_size_alignment(size_t size_alignment) {
if ((max_len_ != std::numeric_limits<size_t>::max()) &&
(max_len_ % size_alignment != 0)) {
return false;
}
size_alignment_ = size_alignment;
return true;
}
// Set max length in bytes for mutants with modified sizes.
//
// Returns true if new max length was accepted. Returns false if specified max
// length is not a multiple of size alignment.
bool set_max_len(size_t max_len) {
if ((max_len != std::numeric_limits<size_t>::max()) &&
(max_len % size_alignment_ != 0)) {
return false;
}
max_len_ = max_len;
return true;
}
private:
FRIEND_TEST(ByteArrayMutator, RoundUpToAddCorrectly);
FRIEND_TEST(ByteArrayMutator, RoundDownToRemoveCorrectly);
// Given a current size and a number of bytes to add, returns the number of
// bytes that should be added for the resulting size to be properly aligned.
//
// If the original to_add would result in an unaligned input size, we round up
// to the next larger aligned size.
//
// This function respects `max_len_` and will return 0 if curr_size is already
// greater than or equal to `max_len_`.
size_t RoundUpToAdd(size_t curr_size, size_t to_add);
// Given a current size and a number of bytes to remove, returns the number of
// bytes that should be removed for the resulting size to be property aligned.
//
// If the original to_remove would result in an unaligned input size, we
// round down to the next smaller aligned size.
//
// However, we never return a number of bytes to remove that would result in a
// 0 size. In this case, the resulting size will be the smaller of
// curr_size and size_alignment_.
//
// This function respects `max_len_` and may return a larger number necessary
// to get the mutant's size to below `max_len_`.
size_t RoundDownToRemove(size_t curr_size, size_t to_remove);
// Size alignment in bytes to generate mutants.
//
// For example, if size_alignment_ is 1, generated mutants can have any
// number of bytes. If size_alignment_ is 4, generated mutants will have sizes
// that are 4-byte aligned.
size_t size_alignment_ = 1;
// Max length of a generated mutant in bytes.
size_t max_len_ = std::numeric_limits<size_t>::max();
Rng rng_;
const Knobs &knobs_;
std::vector<DictEntry> dictionary_;
CmpDictionary cmp_dictionary_;
};
} // namespace centipede
#endif // THIRD_PARTY_CENTIPEDE_BYTE_ARRAY_MUTATOR_H_