blob: 60a70907b2eee3b744b8efe1467572ddadc63adb [file] [log] [blame]
// Copyright 2022 The Centipede Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Function interceptors for Centipede.
#include <dlfcn.h> // for dlsym()
#include <pthread.h>
#include <cstdint>
#include <cstring>
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "./centipede/sancov_state.h"
using fuzztest::internal::tls;
// Used for the interceptors to avoid sanitizing them, as they could be called
// before or during the sanitizer initialization. Instead, we check if the
// current thread is marked as started by the runner as the proxy of sanitizier
// initialization. If not, we skip the interception logic.
#define NO_SANITIZE __attribute__((no_sanitize("all")))
namespace {
// Wrapper for dlsym().
// Returns the pointer to the real function `function_name`.
// In most cases we need FuncAddr("foo") to be called before the first call to
// foo(), which means we either need to do this very early at startup
// (e.g. pre-init array), or on the first call.
// Currently, we do this on the first call via function-scope static.
template <typename FunctionT>
FunctionT FuncAddr(const char *function_name) {
void *addr = dlsym(RTLD_NEXT, function_name);
return reinterpret_cast<FunctionT>(addr);
}
// 3rd and 4th arguments to pthread_create(), packed into a struct.
struct ThreadCreateArgs {
void *(*start_routine)(void *);
void *arg;
};
// Wrapper for a `start_routine` argument of pthread_create().
// Calls the actual start_routine and returns its results.
// Performs custom actions before and after start_routine().
// `arg` is a `ThreadCreateArgs *` with the actual pthread_create() args.
void *MyThreadStart(void *absl_nonnull arg) {
auto *args_orig_ptr = static_cast<ThreadCreateArgs *>(arg);
auto args = *args_orig_ptr;
delete args_orig_ptr; // allocated in the pthread_create wrapper.
tls.OnThreadStart();
void *retval = args.start_routine(args.arg);
return retval;
}
// Normalize the *cmp result value to be one of {1, -1, 0}.
// According to the spec, *cmp can return any positive or negative value,
// and in fact it does return various different positive and negative values
// depending on <some random factors>. These values are later passed to our
// CMP instrumentation and are used to produce features.
// If we don't normalize the return value here, our tests may be flaky.
int NormalizeCmpResult(int result) {
if (result < 0) return -1;
if (result > 0) return 1;
return result;
}
} // namespace
namespace fuzztest::internal {
void SancovInterceptor() {} // to be referenced in sancov_state.cc
} // namespace fuzztest::internal
// A sanitizer-compatible way to intercept functions that are potentially
// intercepted by sanitizers, in which case the symbol __interceptor_X would be
// defined for intercepted function X. So we always forward an intercepted call
// to the sanitizer interceptor if it exists, and fall back to the next
// definition following dlsym.
//
// We define the X_orig pointers that are statically initialized to GetOrig_X()
// with the aforementioned logic to fill the pointers early, but they might
// still be too late. So the Centipede interceptors might need to handle the
// nullptr case and/or use REAL(X), which calls GetOrig_X() when needed. Also
// see compiler-rt/lib/interception/interception.h in the llvm-project source
// code.
//
// Note that since LLVM 17 it allows three interceptions (from the original
// binary, an external tool, and a sanitizer) to co-exist under a new scheme,
// while it is still compatible with the old way used here.
#define SANITIZER_INTERCEPTOR_NAME(orig_func_name) \
__interceptor_##orig_func_name
#define DECLARE_CENTIPEDE_ORIG_FUNC(ret_type, orig_func_name, args) \
extern "C" __attribute__((weak)) ret_type( \
SANITIZER_INTERCEPTOR_NAME(orig_func_name)) args; \
static decltype(&SANITIZER_INTERCEPTOR_NAME(orig_func_name)) \
GetOrig_##orig_func_name() { \
if (auto p = &SANITIZER_INTERCEPTOR_NAME(orig_func_name)) return p; \
return FuncAddr<decltype(&SANITIZER_INTERCEPTOR_NAME(orig_func_name))>( \
#orig_func_name); \
} \
static ret_type(*orig_func_name##_orig) args; \
__attribute__((constructor)) void InitializeOrig_##orig_func_name() { \
orig_func_name##_orig = GetOrig_##orig_func_name(); \
}
#define REAL(orig_func_name) \
(orig_func_name##_orig ? orig_func_name##_orig : GetOrig_##orig_func_name())
DECLARE_CENTIPEDE_ORIG_FUNC(int, memcmp,
(const void *s1, const void *s2, size_t n));
DECLARE_CENTIPEDE_ORIG_FUNC(int, strcmp, (const char *s1, const char *s2));
DECLARE_CENTIPEDE_ORIG_FUNC(int, strncmp,
(const char *s1, const char *s2, size_t n));
DECLARE_CENTIPEDE_ORIG_FUNC(int, pthread_create,
(pthread_t * thread, const pthread_attr_t *attr,
void *(*start_routine)(void *), void *arg));
// Fallback for the case *cmp_orig is null.
// Will be executed several times at process startup, if at all.
static NO_SANITIZE int memcmp_fallback(const void *s1, const void *s2,
size_t n) {
const auto *p1 = static_cast<const uint8_t *>(s1);
const auto *p2 = static_cast<const uint8_t *>(s2);
for (size_t i = 0; i < n; ++i) {
int diff = p1[i] - p2[i];
if (diff) return diff;
}
return 0;
}
// memcmp interceptor.
// Calls the real memcmp() and possibly modifies state.cmp_feature_set.
extern "C" NO_SANITIZE int memcmp(const void *s1, const void *s2, size_t n) {
const int result =
memcmp_orig ? memcmp_orig(s1, s2, n) : memcmp_fallback(s1, s2, n);
if (ABSL_PREDICT_FALSE(!tls.traced)) {
return result;
}
tls.TraceMemCmp(reinterpret_cast<uintptr_t>(__builtin_return_address(0)),
reinterpret_cast<const uint8_t *>(s1),
reinterpret_cast<const uint8_t *>(s2), n, result == 0);
return NormalizeCmpResult(result);
}
// TODO(b/341111359): Investigate inefficiencies in the `strcmp`/`strncmp`
// interceptors and `TraceMemCmp`.
// strcmp interceptor.
// Calls the real strcmp() and possibly modifies state.cmp_feature_set.
extern "C" NO_SANITIZE int strcmp(const char *s1, const char *s2) {
// Find the length of the shorter string, as this determines the actual number
// of bytes that are compared. Note that this is needed even if we call
// `strcmp_orig` because we're passing it to `TraceMemCmp()`.
size_t len = 0;
while (s1[len] && s2[len]) ++len;
const int result =
// Need to include one more byte than the shorter string length
// when falling back to memcmp e.g. "foo" < "foobar".
strcmp_orig ? strcmp_orig(s1, s2) : memcmp_fallback(s1, s2, len + 1);
if (ABSL_PREDICT_FALSE(!tls.traced)) {
return result;
}
// Pass `len` here to avoid storing the trailing '\0' in the dictionary.
tls.TraceMemCmp(reinterpret_cast<uintptr_t>(__builtin_return_address(0)),
reinterpret_cast<const uint8_t *>(s1),
reinterpret_cast<const uint8_t *>(s2), len, result == 0);
return NormalizeCmpResult(result);
}
// strncmp interceptor.
// Calls the real strncmp() and possibly modifies state.cmp_feature_set.
extern "C" NO_SANITIZE int strncmp(const char *s1, const char *s2, size_t n) {
// Find the length of the shorter string, as this determines the actual number
// of bytes that are compared. Note that this is needed even if we call
// `strncmp_orig` because we're passing it to `TraceMemCmp()`.
size_t len = 0;
while (len < n && s1[len] && s2[len]) ++len;
// Need to include '\0' in the comparison if the shorter string is shorter
// than `n`, hence we add 1 to the length.
if (n > len + 1) n = len + 1;
const int result =
strncmp_orig ? strncmp_orig(s1, s2, n) : memcmp_fallback(s1, s2, n);
if (ABSL_PREDICT_FALSE(!tls.traced)) {
return result;
}
// Pass `len` here to avoid storing the trailing '\0' in the dictionary.
tls.TraceMemCmp(reinterpret_cast<uintptr_t>(__builtin_return_address(0)),
reinterpret_cast<const uint8_t *>(s1),
reinterpret_cast<const uint8_t *>(s2), len, result == 0);
return NormalizeCmpResult(result);
}
// pthread_create interceptor.
// Calls real pthread_create, but wraps the start_routine() in MyThreadStart.
extern "C" int pthread_create(
pthread_t *absl_nonnull thread, // NOLINT
const pthread_attr_t *absl_nullable attr, // NOLINT
void *(*start_routine)(void *),
void *absl_nullable arg) { // NOLINT
if (ABSL_PREDICT_FALSE(!tls.started)) {
return REAL(pthread_create)(thread, attr, start_routine, arg);
}
// Wrap the arguments. Will be deleted in MyThreadStart.
auto *wrapped_args = new ThreadCreateArgs{start_routine, arg};
// Run the actual pthread_create.
return REAL(pthread_create)(thread, attr, MyThreadStart, wrapped_args);
}