diff --git a/crypto/test/CMakeLists.txt b/crypto/test/CMakeLists.txt
index 3e02c3c..0b1eab8 100644
--- a/crypto/test/CMakeLists.txt
+++ b/crypto/test/CMakeLists.txt
@@ -3,6 +3,7 @@
 
   OBJECT
 
+  abi_test.cc
   file_test.cc
   malloc.cc
   test_util.cc
diff --git a/crypto/test/abi_test.cc b/crypto/test/abi_test.cc
new file mode 100644
index 0000000..890aa15
--- /dev/null
+++ b/crypto/test/abi_test.cc
@@ -0,0 +1,62 @@
+/* Copyright (c) 2018, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include "abi_test.h"
+
+#include <openssl/rand.h>
+
+
+namespace abi_test {
+namespace internal {
+
+std::string FixVAArgsString(const char *str) {
+  std::string ret = str;
+  size_t idx = ret.find(',');
+  if (idx == std::string::npos) {
+    return ret + "()";
+  }
+  size_t idx2 = idx + 1;
+  while (idx2 < ret.size() && ret[idx2] == ' ') {
+    idx2++;
+  }
+  while (idx > 0 && ret[idx - 1] == ' ') {
+    idx--;
+  }
+  return ret.substr(0, idx) + "(" + ret.substr(idx2) + ")";
+}
+
+#if defined(SUPPORTS_ABI_TEST)
+crypto_word_t RunTrampoline(Result *out, crypto_word_t func,
+                            const crypto_word_t *argv, size_t argc) {
+  CallerState state;
+  RAND_bytes(reinterpret_cast<uint8_t *>(&state), sizeof(state));
+
+  // TODO(davidben): Use OS debugging APIs to single-step |func| and test that
+  // CFI and SEH annotations are correct.
+  CallerState state2 = state;
+  crypto_word_t ret = abi_test_trampoline(func, &state2, argv, argc);
+
+  *out = Result();
+#define CALLER_STATE_REGISTER(type, name)                    \
+  if (state.name != state2.name) {                           \
+    out->errors.push_back(#name " was not restored"); \
+  }
+  LOOP_CALLER_STATE_REGISTERS()
+#undef CALLER_STATE_REGISTER
+  return ret;
+}
+#endif
+
+}  // namespace internal
+}  // namespace abi_test
diff --git a/crypto/test/abi_test.h b/crypto/test/abi_test.h
new file mode 100644
index 0000000..ab9a729
--- /dev/null
+++ b/crypto/test/abi_test.h
@@ -0,0 +1,233 @@
+/* Copyright (c) 2018, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#ifndef OPENSSL_HEADER_ABI_TEST_H
+#define OPENSSL_HEADER_ABI_TEST_H
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include <openssl/base.h>
+
+#include "../internal.h"
+
+
+// abi_test provides routines for verifying that functions satisfy platform ABI
+// requirements.
+namespace abi_test {
+
+// Result stores the result of an ABI test.
+struct Result {
+  bool ok() const { return errors.empty(); }
+
+  std::vector<std::string> errors;
+};
+
+namespace internal {
+
+// DeductionGuard wraps |T| in a template, so that template argument deduction
+// does not apply to it. This may be used to force C++ to deduce template
+// arguments from another parameter.
+template <typename T>
+struct DeductionGuard {
+  using Type = T;
+};
+
+// Reg128 contains storage space for a 128-bit register.
+struct alignas(16) Reg128 {
+  bool operator==(const Reg128 &x) const { return x.lo == lo && x.hi == hi; }
+  bool operator!=(const Reg128 &x) const { return !((*this) == x); }
+  uint64_t lo, hi;
+};
+
+// LOOP_CALLER_STATE_REGISTERS is a macro that iterates over all registers the
+// callee is expected to save for the caller.
+//
+// TODO(davidben): Add support for other architectures.
+#if defined(OPENSSL_X86_64)
+#if defined(OPENSSL_WINDOWS)
+// See https://docs.microsoft.com/en-us/cpp/build/x64-software-conventions?view=vs-2017#register-usage
+#define LOOP_CALLER_STATE_REGISTERS()  \
+  CALLER_STATE_REGISTER(uint64_t, rbx) \
+  CALLER_STATE_REGISTER(uint64_t, rdp) \
+  CALLER_STATE_REGISTER(uint64_t, rdi) \
+  CALLER_STATE_REGISTER(uint64_t, rsi) \
+  CALLER_STATE_REGISTER(uint64_t, r12) \
+  CALLER_STATE_REGISTER(uint64_t, r13) \
+  CALLER_STATE_REGISTER(uint64_t, r14) \
+  CALLER_STATE_REGISTER(uint64_t, r15) \
+  CALLER_STATE_REGISTER(Reg128, xmm6)  \
+  CALLER_STATE_REGISTER(Reg128, xmm7)  \
+  CALLER_STATE_REGISTER(Reg128, xmm8)  \
+  CALLER_STATE_REGISTER(Reg128, xmm9)  \
+  CALLER_STATE_REGISTER(Reg128, xmm10) \
+  CALLER_STATE_REGISTER(Reg128, xmm11) \
+  CALLER_STATE_REGISTER(Reg128, xmm12) \
+  CALLER_STATE_REGISTER(Reg128, xmm13) \
+  CALLER_STATE_REGISTER(Reg128, xmm14) \
+  CALLER_STATE_REGISTER(Reg128, xmm15)
+#else
+// See https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
+#define LOOP_CALLER_STATE_REGISTERS()  \
+  CALLER_STATE_REGISTER(uint64_t, rbx) \
+  CALLER_STATE_REGISTER(uint64_t, rbp) \
+  CALLER_STATE_REGISTER(uint64_t, r12) \
+  CALLER_STATE_REGISTER(uint64_t, r13) \
+  CALLER_STATE_REGISTER(uint64_t, r14) \
+  CALLER_STATE_REGISTER(uint64_t, r15)
+#endif  // OPENSSL_WINDOWS
+#endif  // X86_64 && SUPPORTS_ABI_TEST
+
+// Enable ABI testing if all of the following are true.
+//
+// - We have CallerState and trampoline support for the architecture.
+//
+// - Assembly is enabled.
+//
+// - This is not a shared library build. Assembly functions are not reachable
+//   from tests in shared library builds.
+//
+// - This is a debug build. We can instrument release builds as well, but this
+//   ensures we have coverage for both instrumented and uninstrumented code.
+//   See the comment in |CHECK_ABI|. Note ABI testing is only meaningful for
+//   assembly, which is not affected by compiler optimizations.
+#if defined(LOOP_CALLER_STATE_REGISTERS) && !defined(OPENSSL_NO_ASM) && \
+    !defined(BORINGSSL_SHARED_LIBRARY) && !defined(NDEBUG)
+#define SUPPORTS_ABI_TEST
+
+// CallerState contains all caller state that the callee is expected to
+// preserve.
+struct CallerState {
+#define CALLER_STATE_REGISTER(type, name) type name;
+  LOOP_CALLER_STATE_REGISTERS()
+#undef CALLER_STATE_REGISTER
+};
+
+// RunTrampoline runs |func| on |argv|, recording ABI errors in |out|. It does
+// not perform any type-checking.
+crypto_word_t RunTrampoline(Result *out, crypto_word_t func,
+                            const crypto_word_t *argv, size_t argc);
+
+// CheckImpl runs |func| on |args|, recording ABI errors in |out|.
+//
+// It returns the value as a |crypto_word_t| to work around problems when |R| is
+// void. |args| is wrapped in a |DeductionGuard| so |func| determines the
+// template arguments. Otherwise, |args| may deduce |Args| incorrectly. For
+// instance, if |func| takes const int *, and the caller passes an int *, the
+// compiler will complain the deduced types do not match.
+template <typename R, typename... Args>
+inline crypto_word_t CheckImpl(Result *out, R (*func)(Args...),
+                               typename DeductionGuard<Args>::Type... args) {
+  static_assert(sizeof...(args) <= 10,
+                "too many arguments for abi_test_trampoline");
+
+  // Allocate one extra entry so MSVC does not complain about zero-size arrays.
+  crypto_word_t argv[sizeof...(args) + 1] = {
+      (crypto_word_t)args...,
+  };
+  return RunTrampoline(out, reinterpret_cast<crypto_word_t>(func), argv,
+                       sizeof...(args));
+}
+#else
+// To simplify callers when ABI testing support is unavoidable, provide a backup
+// CheckImpl implementation. It must be specialized for void returns because we
+// call |func| directly.
+template <typename R, typename... Args>
+inline typename std::enable_if<!std::is_void<R>::value, crypto_word_t>::type
+CheckImpl(Result *out, R (*func)(Args...),
+          typename DeductionGuard<Args>::Type... args) {
+  *out = Result();
+  return func(args...);
+}
+
+template <typename... Args>
+inline crypto_word_t CheckImpl(Result *out, void (*func)(Args...),
+                               typename DeductionGuard<Args>::Type... args) {
+  *out = Result();
+  func(args...);
+  return 0;
+}
+#endif  // SUPPORTS_ABI_TEST
+
+// FixVAArgsString takes a string like "f, 1, 2" and returns a string like
+// "f(1, 2)".
+//
+// This is needed because the |CHECK_ABI| macro below cannot be defined as
+// CHECK_ABI(func, ...). The C specification requires that variadic macros bind
+// at least one variadic argument. Clang, GCC, and MSVC all ignore this, but
+// there are issues with trailing commas and different behaviors across
+// compilers.
+std::string FixVAArgsString(const char *str);
+
+// CheckGTest behaves like |CheckImpl|, but it returns the correct type and
+// raises GTest assertions on failure.
+template <typename R, typename... Args>
+inline R CheckGTest(const char *va_args_str, const char *file, int line,
+                    R (*func)(Args...),
+                    typename DeductionGuard<Args>::Type... args) {
+  Result result;
+  crypto_word_t ret = CheckImpl(&result, func, args...);
+  if (!result.ok()) {
+    testing::Message msg;
+    msg << "ABI failures in " << FixVAArgsString(va_args_str) << ":\n";
+    for (const auto &error : result.errors) {
+      msg << "    " << error << "\n";
+    }
+    ADD_FAILURE_AT(file, line) << msg;
+  }
+  return (R)ret;
+}
+
+}  // namespace internal
+
+// Check runs |func| on |args| and returns the result. If ABI-testing is
+// supported in this build configuration, it writes any ABI failures to |out|.
+// Otherwise, it runs the function transparently.
+template <typename R, typename... Args>
+inline R Check(Result *out, R (*func)(Args...),
+               typename internal::DeductionGuard<Args>::Type... args) {
+  return (R)internal::CheckImpl(out, func, args...);
+}
+
+}  // namespace abi_test
+
+// CHECK_ABI calls the first argument on the remaining arguments and returns the
+// result. If ABI-testing is supported in this build configuration, it adds a
+// non-fatal GTest failure if the call did not satisfy ABI requirements.
+//
+// |CHECK_ABI| does return the value and thus may replace any function call,
+// provided it takes only simple parameters. It is recommended to integrate it
+// into functional tests of assembly. To ensure coverage of both instrumented
+// and uninstrumented calls, ABI testing is disabled in release-mode tests.
+#define CHECK_ABI(...) \
+  abi_test::internal::CheckGTest(#__VA_ARGS__, __FILE__, __LINE__, __VA_ARGS__)
+
+
+// Internal functions.
+
+#if defined(SUPPORTS_ABI_TEST)
+// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
+// with |argv|, then saves the callee-saved registers into |state|. It returns
+// the result of |func|. We give |func| type |crypto_word_t| to avoid tripping
+// MSVC's warning 4191.
+extern "C" crypto_word_t abi_test_trampoline(
+    crypto_word_t func, abi_test::internal::CallerState *state,
+    const crypto_word_t *argv, size_t argc);
+#endif  // SUPPORTS_ABI_TEST
+
+
+#endif  // OPENSSL_HEADER_ABI_TEST_H
diff --git a/crypto/test/asm/trampoline-x86_64.pl b/crypto/test/asm/trampoline-x86_64.pl
new file mode 100755
index 0000000..b1f9b93
--- /dev/null
+++ b/crypto/test/asm/trampoline-x86_64.pl
@@ -0,0 +1,267 @@
+#!/usr/bin/env perl
+# Copyright (c) 2018, Google Inc.
+#
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+# This file defines helper functions for crypto/test/abi_test.h on x86_64. See
+# that header for details on how to use this.
+#
+# For convenience, this file is linked into libcrypto, where consuming builds
+# already support architecture-specific sources. The static linker should drop
+# this code in non-test binaries. This includes a shared library build of
+# libcrypto, provided --gc-sections (ELF), -dead_strip (Mac), or equivalent is
+# used.
+#
+# References:
+#
+# SysV ABI: https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
+# Win64 ABI: https://docs.microsoft.com/en-us/cpp/build/x64-software-conventions?view=vs-2017
+
+use strict;
+
+my $flavour = shift;
+my $output  = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+my $win64 = 0;
+$win64 = 1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/;
+my $dir = $1;
+my $xlate;
+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+die "can't locate x86_64-xlate.pl";
+
+open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
+*STDOUT = *OUT;
+
+# @inp is the registers used for function inputs, in order.
+my @inp = $win64 ? ("%rcx", "%rdx", "%r8", "%r9") :
+                   ("%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9");
+
+# @caller_state is the list of registers that the callee must preserve for the
+# caller. This must match the definition of CallerState in abi_test.h.
+my @caller_state = ("%rbx", "%rbp", "%r12", "%r13", "%r14", "%r15");
+if ($win64) {
+  @caller_state = ("%rbx", "%rbp", "%rdi", "%rsi", "%r12", "%r13", "%r14",
+                   "%r15", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10",
+                   "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15");
+}
+
+# $caller_state_size is the size of CallerState, in bytes.
+my $caller_state_size = 0;
+foreach (@caller_state) {
+  if (/^%r/) {
+    $caller_state_size += 8;
+  } elsif (/^%xmm/) {
+    $caller_state_size += 16;
+  } else {
+    die "unknown register $_";
+  }
+}
+
+# load_caller_state returns code which loads a CallerState structure at
+# $off($reg) into the respective registers. No other registers are touched, but
+# $reg may not be a register in CallerState. $cb is an optional callback to
+# add extra lines after each movq or movdqa. $cb is passed the offset, relative
+# to $reg, and name of each register.
+sub load_caller_state {
+  my ($off, $reg, $cb) = @_;
+  my $ret = "";
+  foreach (@caller_state) {
+    my $old_off = $off;
+    if (/^%r/) {
+      $ret .= "\tmovq\t$off($reg), $_\n";
+      $off += 8;
+    } elsif (/^%xmm/) {
+      $ret .= "\tmovdqa\t$off($reg), $_\n";
+      $off += 16;
+    } else {
+      die "unknown register $_";
+    }
+    $ret .= $cb->($old_off, $_) if (defined($cb));
+  }
+  return $ret;
+}
+
+# store_caller_state behaves like load_caller_state, except that it writes the
+# current values of the registers into $off($reg).
+sub store_caller_state {
+  my ($off, $reg, $cb) = @_;
+  my $ret = "";
+  foreach (@caller_state) {
+    my $old_off = $off;
+    if (/^%r/) {
+      $ret .= "\tmovq\t$_, $off($reg)\n";
+      $off += 8;
+    } elsif (/^%xmm/) {
+      $ret .= "\tmovdqa\t$_, $off($reg)\n";
+      $off += 16;
+    } else {
+      die "unknown register $_";
+    }
+    $ret .= $cb->($old_off, $_) if (defined($cb));
+  }
+  return $ret;
+}
+
+# $max_params is the maximum number of parameters abi_test_trampoline supports.
+my $max_params = 10;
+
+# Windows reserves stack space for the register-based parameters, while SysV
+# only reserves space for the overflow ones.
+my $stack_params_skip = $win64 ? scalar(@inp) : 0;
+my $num_stack_params = $win64 ? $max_params : $max_params - scalar(@inp);
+
+my ($func, $state, $argv, $argc) = @inp;
+my $code = <<____;
+.text
+
+# abi_test_trampoline loads callee-saved registers from |state|, calls |func|
+# with |argv|, then saves the callee-saved registers into |state|. It returns
+# the result of |func|.
+# uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
+#                              const uint64_t *argv, size_t argc);
+.type	abi_test_trampoline, \@abi-omnipotent
+.globl	abi_test_trampoline
+.align	16
+abi_test_trampoline:
+.cfi_startproc
+	# Stack layout:
+	#   8 bytes - align
+	#   $caller_state_size bytes - saved caller registers
+	#   8 bytes - scratch space
+	#   8 bytes - saved copy of \$state
+	#   8 bytes - saved copy of \$func
+	#   8 bytes - if needed for stack alignment
+	#   8*$num_stack_params bytes - parameters for \$func
+____
+my $stack_alloc_size = 8 + $caller_state_size + 8*3 + 8*$num_stack_params;
+# SysV and Windows both require the stack to be 16-byte-aligned. The call
+# instruction offsets it by 8, so stack allocations must be 8 mod 16.
+if ($stack_alloc_size % 16 != 8) {
+  $num_stack_params++;
+  $stack_alloc_size += 8;
+}
+my $stack_params_offset = 8 * $stack_params_skip;
+my $func_offset = 8 * $num_stack_params;
+my $state_offset = $func_offset + 8;
+my $scratch_offset = $state_offset + 8;
+my $caller_state_offset = $scratch_offset + 8;
+$code .= <<____;
+	subq	\$$stack_alloc_size, %rsp
+.cfi_adjust_cfa_offset	$stack_alloc_size
+____
+# Store our caller's state. This is needed because we modify it ourselves, and
+# also to isolate the test infrastruction from the function under test failing
+# to save some register.
+$code .= store_caller_state($caller_state_offset, "%rsp", sub {
+  my ($off, $reg) = @_;
+  $reg = substr($reg, 1);
+  $off -= $stack_alloc_size + 8;
+  return ".cfi_offset\t$reg, $off\n";
+});
+
+$code .= load_caller_state(0, $state);
+$code .= <<____;
+	# Stash \$func and \$state, so they are available after the call returns.
+	movq	$func, $func_offset(%rsp)
+	movq	$state, $state_offset(%rsp)
+
+	# Load parameters. Note this will clobber \$argv and \$argc, so we can
+	# only use non-parameter volatile registers. There are three, and they
+	# are the same between SysV and Win64: %rax, %r10, and %r11.
+	movq	$argv, %r10
+	movq	$argc, %r11
+____
+foreach (@inp) {
+	$code .= <<____;
+	dec	%r11
+	js	.Lcall
+	movq	(%r10), $_
+	addq	\$8, %r10
+____
+}
+$code .= <<____;
+	leaq	$stack_params_offset(%rsp), %rax
+.Largs_loop:
+	dec	%r11
+	js	.Lcall
+
+  # This block should be:
+  #    movq (%r10), %rtmp
+  #    movq %rtmp, (%rax)
+  # There are no spare registers available, so we spill into the scratch space.
+	movq	%r11, $scratch_offset(%rsp)
+	movq	(%r10), %r11
+	movq	%r11, (%rax)
+	movq	$scratch_offset(%rsp), %r11
+
+	addq	\$8, %r10
+	addq	\$8, %rax
+	jmp	.Largs_loop
+
+.Lcall:
+	movq	$func_offset(%rsp), %rax
+	call	*%rax
+
+	# Store what \$func did our state, so our caller can check.
+  movq  $state_offset(%rsp), $state
+____
+$code .= store_caller_state(0, $state);
+
+# Restore our caller's state.
+$code .= load_caller_state($caller_state_offset, "%rsp", sub {
+  my ($off, $reg) = @_;
+  $reg = substr($reg, 1);
+  return ".cfi_restore\t$reg\n";
+});
+$code .= <<____;
+	addq	\$$stack_alloc_size, %rsp
+.cfi_adjust_cfa_offset	-$stack_alloc_size
+
+  # %rax already contains \$func's return value, unmodified.
+	ret
+.cfi_endproc
+.size	abi_test_trampoline,.-abi_test_trampoline
+____
+
+# abi_test_clobber_* zeros the corresponding register. These are used to test
+# the ABI-testing framework.
+foreach ("ax", "bx", "cx", "dx", "di", "si", "bp", 8..15) {
+  $code .= <<____;
+.type	abi_test_clobber_r$_, \@abi-omnipotent
+.globl	abi_test_clobber_r$_
+.align	16
+abi_test_clobber_r$_:
+	xorq	%r$_, %r$_
+	ret
+.size	abi_test_clobber_r$_,.-abi_test_clobber_r$_
+____
+}
+
+foreach (0..15) {
+  $code .= <<____;
+.type	abi_test_clobber_xmm$_, \@abi-omnipotent
+.globl	abi_test_clobber_xmm$_
+.align	16
+abi_test_clobber_xmm$_:
+	pxor	%xmm$_, %xmm$_
+	ret
+.size	abi_test_clobber_xmm$_,.-abi_test_clobber_xmm$_
+____
+}
+
+print $code;
+close STDOUT;
diff --git a/crypto/test/gtest_main.cc b/crypto/test/gtest_main.cc
index a557168..f19b830 100644
--- a/crypto/test/gtest_main.cc
+++ b/crypto/test/gtest_main.cc
@@ -20,6 +20,7 @@
 #include <openssl/cpu.h>
 #include <openssl/rand.h>
 
+#include "abi_test.h"
 #include "gtest_main.h"
 #include "../internal.h"
 
@@ -71,5 +72,17 @@
   }
 #endif  // TEST_ARM_CPUS
 
-  return RUN_ALL_TESTS();
+  // Run the entire test suite under an ABI check. This is less effective than
+  // testing the individual assembly functions, but will catch issues with
+  // rarely-used registers.
+  abi_test::Result abi;
+  int ret = abi_test::Check(&abi, RUN_ALL_TESTS);
+  if (!abi.ok()) {
+    fprintf(stderr, "ABI failure in test suite:\n");
+    for (const auto &error : abi.errors) {
+      fprintf(stderr, "    %s\n", error.c_str());
+    }
+    exit(1);
+  }
+  return ret;
 }
