Add initial support for SEH directives in x86_64 perlasm

This implements similar directives as MASM, so we do not need to build
all the structures by hand. It does not provide any help to abstract
between Win64 and SysV differences, however.

This is pulled together from some old draft CLs I had, one of which
actually synthesized CFI directives from SEH, so it should be possible.
I've intentionally omitted that however, as it also brings in questions
about how to handle the calling convention differences (the existing
machinery won't *quite* work). I've uploaded just this for now, so
review can focus on the basic mechanism.

I've also preserved perlasm's weird mixed tabs and spaces indentation
convention for now, though it is a bit tedious.

Bug: 259
Change-Id: Ib3f46a27751a5319b758d12c462c660cf9f3e632
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/56126
Auto-Submit: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: Adam Langley <agl@google.com>
diff --git a/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl b/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl
index f736473..0dcf8ed 100644
--- a/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl
@@ -103,16 +103,15 @@
 .align	16
 gcm_gmult_ssse3:
 .cfi_startproc
-.Lgmult_seh_begin:
+.seh_startproc
 ____
 $code .= <<____ if ($win64);
 	subq	\$40, %rsp
-.Lgmult_seh_allocstack:
+.seh_allocstack	40
 	movdqa	%xmm6, (%rsp)
-.Lgmult_seh_save_xmm6:
+.seh_savexmm128	%xmm6, 0
 	movdqa	%xmm10, 16(%rsp)
-.Lgmult_seh_save_xmm10:
-.Lgmult_seh_prolog_end:
+.seh_savexmm128	%xmm10, 16
 ____
 $code .= <<____;
 	movdqu	($Xi), %xmm0
@@ -230,8 +229,8 @@
 ____
 $code .= <<____;
 	ret
-.Lgmult_seh_end:
 .cfi_endproc
+.seh_endproc
 .size	gcm_gmult_ssse3,.-gcm_gmult_ssse3
 ____
 
@@ -245,19 +244,18 @@
 .globl	gcm_ghash_ssse3
 .align	16
 gcm_ghash_ssse3:
-.Lghash_seh_begin:
 .cfi_startproc
+.seh_startproc
 ____
 $code .= <<____ if ($win64);
 	subq	\$56, %rsp
-.Lghash_seh_allocstack:
+.seh_allocstack	56
 	movdqa	%xmm6, (%rsp)
-.Lghash_seh_save_xmm6:
+.seh_savexmm128	%xmm6, 0
 	movdqa	%xmm10, 16(%rsp)
-.Lghash_seh_save_xmm10:
+.seh_savexmm128	%xmm10, 16
 	movdqa	%xmm11, 32(%rsp)
-.Lghash_seh_save_xmm11:
-.Lghash_seh_prolog_end:
+.seh_savexmm128	%xmm11, 32
 ____
 $code .= <<____;
 	movdqu	($Xi), %xmm0
@@ -329,8 +327,8 @@
 ____
 $code .= <<____;
 	ret
-.Lghash_seh_end:
 .cfi_endproc
+.seh_endproc
 .size	gcm_ghash_ssse3,.-gcm_ghash_ssse3
 
 .align	16
@@ -343,71 +341,5 @@
 .quad	0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
 ____
 
-if ($win64) {
-  # Add unwind metadata for SEH.
-  #
-  # TODO(davidben): This is all manual right now. Once we've added SEH tests,
-  # add support for emitting these in x86_64-xlate.pl, probably based on MASM
-  # and Yasm's unwind directives, and unify with CFI. Then upstream it to
-  # replace the error-prone and non-standard custom handlers.
-
-  # See https://docs.microsoft.com/en-us/cpp/build/struct-unwind-code?view=vs-2017
-  my $UWOP_ALLOC_SMALL = 2;
-  my $UWOP_SAVE_XMM128 = 8;
-
-  $code .= <<____;
-.section	.pdata
-.align	4
-	.rva	.Lgmult_seh_begin
-	.rva	.Lgmult_seh_end
-	.rva	.Lgmult_seh_info
-
-	.rva	.Lghash_seh_begin
-	.rva	.Lghash_seh_end
-	.rva	.Lghash_seh_info
-
-.section	.xdata
-.align	8
-.Lgmult_seh_info:
-	.byte	1	# version 1, no flags
-	.byte	.Lgmult_seh_prolog_end-.Lgmult_seh_begin
-	.byte	5	# num_slots = 1 + 2 + 2
-	.byte	0	# no frame register
-
-	.byte	.Lgmult_seh_save_xmm10-.Lgmult_seh_begin
-	.byte	@{[$UWOP_SAVE_XMM128 | (10 << 4)]}
-	.value	1
-
-	.byte	.Lgmult_seh_save_xmm6-.Lgmult_seh_begin
-	.byte	@{[$UWOP_SAVE_XMM128 | (6 << 4)]}
-	.value	0
-
-	.byte	.Lgmult_seh_allocstack-.Lgmult_seh_begin
-	.byte	@{[$UWOP_ALLOC_SMALL | (((40 - 8) / 8) << 4)]}
-
-.align	8
-.Lghash_seh_info:
-	.byte	1	# version 1, no flags
-	.byte	.Lghash_seh_prolog_end-.Lghash_seh_begin
-	.byte	7	# num_slots = 1 + 2 + 2 + 2
-	.byte	0	# no frame register
-
-	.byte	.Lghash_seh_save_xmm11-.Lghash_seh_begin
-	.byte	@{[$UWOP_SAVE_XMM128 | (11 << 4)]}
-	.value	2
-
-	.byte	.Lghash_seh_save_xmm10-.Lghash_seh_begin
-	.byte	@{[$UWOP_SAVE_XMM128 | (10 << 4)]}
-	.value	1
-
-	.byte	.Lghash_seh_save_xmm6-.Lghash_seh_begin
-	.byte	@{[$UWOP_SAVE_XMM128 | (6 << 4)]}
-	.value	0
-
-	.byte	.Lghash_seh_allocstack-.Lghash_seh_begin
-	.byte	@{[$UWOP_ALLOC_SMALL | (((56 - 8) / 8) << 4)]}
-____
-}
-
 print $code;
 close STDOUT or die "error closing STDOUT: $!";
diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl
index 0f57463..f906828 100755
--- a/crypto/perlasm/x86_64-xlate.pl
+++ b/crypto/perlasm/x86_64-xlate.pl
@@ -59,6 +59,10 @@
 # 9. .init segment is allowed to contain calls to functions only.
 # a. If function accepts more than 4 arguments *and* >4th argument
 #    is declared as non 64-bit value, do clear its upper part.
+#
+# TODO(https://crbug.com/boringssl/259): The dual-ABI mechanism described here
+# does not quite unwind correctly on Windows. The seh_directive logic below has
+# the start of a new mechanism.
 
 
 use strict;
@@ -710,6 +714,320 @@
 	return ($elf ? $self->{value} : undef);
     }
 }
+{ package seh_directive;
+    # This implements directives, like MASM's, for specifying Windows unwind
+    # codes. See https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-170
+    # for details on the Windows unwind mechanism. Unlike MASM's directives, we
+    # have no .seh_endprolog directive. Instead, the last prolog directive is
+    # implicitly the end of the prolog.
+    #
+    # TODO(https://crbug.com/boringssl/259): For now, SEH directives are ignored
+    # on non-Windows platforms. This means functions need to specify both CFI
+    # and SEH directives, often redundantly. Ideally we'd abstract between the
+    # two. E.g., we can synthesize CFI from SEH prologs, but SEH does not
+    # annotate epilogs, so we'd need to combine parts from both. Or we can
+    # restrict ourselves to a subset of CFI and synthesize SEH from CFI.
+    #
+    # Additionally, this only supports @abi-omnipotent functions. It is
+    # incompatible with the automatic calling convention conversion. The main
+    # complication is the current scheme modifies RDI and RSI (non-volatile on
+    # Windows) at the start of the function, and saves them in the parameter
+    # stack area. This can be expressed with .seh_savereg, but .seh_savereg is
+    # only usable late in the prolog. However, unwind information gives enough
+    # information to locate the parameter stack area at any point in the
+    # function, so we can defer conversion or implement other schemes.
+
+    my $UWOP_PUSH_NONVOL = 0;
+    my $UWOP_ALLOC_LARGE = 1;
+    my $UWOP_ALLOC_SMALL = 2;
+    my $UWOP_SET_FPREG = 3;
+    my $UWOP_SAVE_NONVOL = 4;
+    my $UWOP_SAVE_NONVOL_FAR = 5;
+    my $UWOP_SAVE_XMM128 = 8;
+    my $UWOP_SAVE_XMM128_FAR = 9;
+
+    my %UWOP_REG_TO_NUMBER = ("%rax" => 0, "%rcx" => 1, "%rdx" => 2, "%rbx" => 3,
+			      "%rsp" => 4, "%rbp" => 5, "%rsi" => 6, "%rdi" => 7,
+			      map(("%r$_" => $_), (8..15)));
+    my %UWOP_NUMBER_TO_REG = reverse %UWOP_REG_TO_NUMBER;
+
+    # The contents of the pdata and xdata sections so far.
+    my ($xdata, $pdata) = ("", "");
+
+    my %info;
+
+    my $next_label = 0;
+    my $current_label_func = "";
+
+    # _new_unwind_label allocates a new label, unique to the file.
+    sub _new_unwind_label {
+	my ($name) = (@_);
+	# Labels only need to be unique, but to make diffs easier to read, scope
+	# them all under the current function.
+	my $func = $current_function->{name};
+	if ($func ne $current_label_func) {
+	    $current_label_func = $func;
+	    $next_label = 0;
+	}
+
+	my $num = $next_label++;
+	return ".LSEH_${name}_${func}_${num}";
+    }
+
+    sub _check_in_proc {
+	die "Missing .seh_startproc directive" unless %info;
+    }
+
+    sub _check_not_in_proc {
+	die "Missing .seh_endproc directive" if %info;
+    }
+
+    sub _startproc {
+	_check_not_in_proc();
+	if ($current_function->{abi} eq "svr4") {
+	    die "SEH directives can only be used with \@abi-omnipotent";
+	}
+
+	my $info_label = _new_unwind_label("info");
+	my $start_label = _new_unwind_label("begin");
+	%info = (
+	    # info_label is the label of the function's entry in .xdata.
+	    info_label => $info_label,
+	    # start_label is the start of the function.
+	    start_label => $start_label,
+	    # endprolog is the label of the last unwind code in the function.
+	    endprolog => $start_label,
+	    # unwind_codes contains the textual representation of the
+	    # unwind codes in the function so far.
+	    unwind_codes => "",
+	    # num_codes is the number of 16-bit words in unwind_codes.
+	    num_codes => 0,
+	    # frame_reg is the number of the frame register, or zero if
+	    # there is none.
+	    frame_reg => 0,
+	    # frame_offset is the offset into the fixed part of the stack that
+	    # the frame register points into.
+	    frame_offset => 0,
+	    # has_offset is whether directives taking an offset have
+	    # been used. This is used to check that such directives
+	    # come after the fixed portion of the stack frame is established.
+	    has_offset => 0,
+	    # has_nonpushreg is whether directives other than
+	    # .seh_pushreg have been used. This is used to check that
+	    # .seh_pushreg directives are first.
+	    has_nonpushreg => 0,
+	);
+	return $start_label;
+    }
+
+    sub _add_unwind_code {
+	my ($op, $value, @extra) = @_;
+	_check_in_proc();
+	if ($op != $UWOP_PUSH_NONVOL) {
+	    $info{has_nonpushreg} = 1;
+	} elsif ($info{has_nonpushreg}) {
+	    die ".seh_pushreg directives must appear first in the prolog";
+	}
+
+	my $label = _new_unwind_label("prolog");
+	# Encode an UNWIND_CODE structure. See
+	# https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-170#struct-unwind_code
+	my $encoded = $op | ($value << 4);
+	my $codes = <<____;
+	.byte	$label-$info{start_label}
+	.byte	$encoded
+____
+	# Some opcodes need additional values to encode themselves.
+	foreach (@extra) {
+	    $codes .= "\t.value\t$_\n";
+	}
+
+	$info{num_codes} += 1 + scalar(@extra);
+	# Unwind codes are listed in reverse order.
+	$info{unwind_codes} = $codes . $info{unwind_codes};
+	# Track the label of the last unwind code. It implicitly is the end of
+	# the prolog. MASM has an endprolog directive, but it seems to be
+	# unnecessary.
+	$info{endprolog} = $label;
+	return $label;
+    }
+
+    sub _updating_fixed_allocation {
+	_check_in_proc();
+	if ($info{frame_reg} != 0) {
+	    # Windows documentation does not explicitly forbid .seh_allocstack
+	    # after .seh_setframe, but it appears to have no effect. Offsets are
+	    # still relative to the fixed allocation when the frame register was
+	    # established.
+	    die "fixed allocation may not be increased after .seh_setframe";
+	}
+	if ($info{has_offset}) {
+	    # Windows documentation does not explicitly forbid .seh_savereg
+	    # before .seh_allocstack, but it does not work very well. Offsets
+	    # are relative to the top of the final fixed allocation, not where
+	    # RSP currently is.
+	    die "directives with an offset must come after the fixed allocation is established.";
+	}
+    }
+
+    sub _endproc {
+	_check_in_proc();
+	if ($info{num_codes} == 0) {
+	    # If a Windows function has no directives (i.e. it doesn't touch the
+	    # stack), it is a leaf function and is not expected to appear in
+	    # .pdata or .xdata.
+	    die ".seh_endproc found with no unwind codes";
+	}
+
+	my $end_label = _new_unwind_label("end");
+	# Encode a RUNTIME_FUNCTION. See
+	# https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-170#struct-runtime_function
+	$pdata .= <<____;
+	.rva	$info{start_label}
+	.rva	$end_label
+	.rva	$info{info_label}
+
+____
+
+	# Encode an UNWIND_INFO. See
+	# https://learn.microsoft.com/en-us/cpp/build/exception-handling-x64?view=msvc-170#struct-unwind_info
+	my $frame_encoded = $info{frame_reg} | (($info{frame_offset} / 16) << 4);
+	$xdata .= <<____;
+$info{info_label}:
+	.byte	1	# version 1, no flags
+	.byte	$info{endprolog}-$info{start_label}
+	.byte	$info{num_codes}
+	.byte	$frame_encoded
+$info{unwind_codes}
+____
+
+	%info = ();
+	return $end_label;
+    }
+
+    sub re {
+	my ($class, $line) = @_;
+	if ($$line =~ s/^\s*\.seh_(\w+)\s*//) {
+	    my $dir = $1;
+	    if (!$win64) {
+		$$line = "";
+		return;
+	    }
+
+	    my $label;
+	    SWITCH: for ($dir) {
+		/^startproc$/ && do {
+		    $label = _startproc();
+		    last;
+		};
+		/^pushreg$/ && do {
+		    $$line =~ /^(%\w+)\s*$/ or die "could not parse .seh_$dir";
+		    my $reg_num = $UWOP_REG_TO_NUMBER{$1} or die "unknown register $1";
+		    _updating_fixed_allocation();
+		    $label = _add_unwind_code($UWOP_PUSH_NONVOL, $reg_num);
+		    last;
+		};
+		/^allocstack$/ && do {
+		    my $num = eval($$line);
+		    if ($num <= 0 || $num % 8 != 0) {
+			die "invalid stack allocation: $num";
+		    }
+		    _updating_fixed_allocation();
+		    if ($num <= 128) {
+			$label = _add_unwind_code($UWOP_ALLOC_SMALL, ($num - 8) / 8);
+		    } elsif ($num < 512 * 1024) {
+			$label = _add_unwind_code($UWOP_ALLOC_LARGE, 0, $num / 8);
+		    } elsif ($num < 4 * 1024 * 1024 * 1024) {
+			$label = _add_unwind_code($UWOP_ALLOC_LARGE, 1, $num >> 16, $num & 0xffff);
+		    } else {
+			die "stack allocation too large: $num"
+		    }
+		    last;
+		};
+		/^setframe$/ && do {
+		    if ($info{frame_reg} != 0) {
+			die "duplicate .seh_setframe directive";
+		    }
+		    if ($info{has_offset}) {
+			die "directives with with an offset must come after .seh_setframe.";
+		    }
+		    $$line =~ /(%\w+)\s*,\s*(.+)/ or die "could not parse .seh_$dir";
+		    my $reg_num = $UWOP_REG_TO_NUMBER{$1} or die "unknown register $1";
+		    my $offset = eval($2);
+		    if ($offset < 0 || $offset % 16 != 0 || $offset > 240) {
+			die "invalid offset: $offset";
+		    }
+		    $info{frame_reg} = $reg_num;
+		    $info{frame_offset} = $offset;
+		    $label = _add_unwind_code($UWOP_SET_FPREG, 0);
+		    last;
+		};
+		/^savereg$/ && do {
+		    $$line =~ /(%\w+)\s*,\s*(.+)/ or die "could not parse .seh_$dir";
+		    my $reg_num = $UWOP_REG_TO_NUMBER{$1} or die "unknown register $1";
+		    my $offset = eval($2);
+		    if ($offset < 0 || $offset % 8 != 0) {
+			die "invalid offset: $offset";
+		    }
+		    if ($offset < 8 * 65536) {
+			$label = _add_unwind_code($UWOP_SAVE_NONVOL, $reg_num, $offset / 8);
+		    } else {
+			$label = _add_unwind_code($UWOP_SAVE_NONVOL_FAR, $reg_num, $offset >> 16, $offset & 0xffff);
+		    }
+		    $info{has_offset} = 1;
+		    last;
+		};
+		/^savexmm128$/ && do {
+		    $$line =~ /%xmm(\d+)\s*,\s*(.+)/ or die "could not parse .seh_$dir";
+		    my $reg_num = $1;
+		    my $offset = eval($2);
+		    if ($offset < 0 || $offset % 16 != 0) {
+			die "invalid offset: $offset";
+		    }
+		    if ($offset < 16 * 65536) {
+			$label = _add_unwind_code($UWOP_SAVE_XMM128, $reg_num, $offset / 16);
+		    } else {
+			$label = _add_unwind_code($UWOP_SAVE_XMM128_FAR, $reg_num, $offset >> 16, $offset & 0xffff);
+		    }
+		    $info{has_offset} = 1;
+		    last;
+		};
+		/^endproc$/ && do {
+		    $label = _endproc();
+		    last;
+		};
+		die "unknown SEH directive .seh_$dir";
+	    }
+
+	    # All SEH directives compile to labels inline. The other data is
+	    # emitted later.
+	    $$line = "";
+	    $label .= ":";
+	    return label->re(\$label);
+	}
+    }
+
+    sub pdata_and_xdata {
+	return "" unless $win64;
+
+	my $ret = "";
+	if ($pdata ne "") {
+	    $ret .= <<____;
+.section	.pdata
+.align	4
+$pdata
+____
+	}
+	if ($xdata ne "") {
+	    $ret .= <<____;
+.section	.xdata
+.align	4
+$xdata
+____
+	}
+	return $ret;
+    }
+}
 { package directive;	# pick up directives, which start with .
     sub re {
 	my	($class, $line) = @_;
@@ -717,8 +1035,9 @@
 	my	$ret;
 	my	$dir;
 
-	# chain-call to cfi_directive
+	# chain-call to cfi_directive and seh_directive.
 	$ret = cfi_directive->re($line) and return $ret;
+	$ret = seh_directive->re($line) and return $ret;
 
 	if ($$line =~ /^\s*(\.\w+)/) {
 	    bless $self,$class;
@@ -1190,8 +1509,8 @@
 ___
 }
 
-while(defined(my $line=<>)) {
-
+sub process_line {
+    my $line = shift;
     $line =~ s|\R$||;           # Better chomp
 
     if ($nasm) {
@@ -1271,6 +1590,13 @@
     print $line,"\n";
 }
 
+while(defined(my $line=<>)) {
+    process_line($line);
+}
+foreach my $line (split(/\n/, seh_directive->pdata_and_xdata())) {
+    process_line($line);
+}
+
 print "\n$current_segment\tENDS\n"	if ($current_segment && $masm);
 if ($masm) {
     print "END\n";
diff --git a/crypto/test/asm/trampoline-x86_64.pl b/crypto/test/asm/trampoline-x86_64.pl
index f6d8385..75c85ec 100755
--- a/crypto/test/asm/trampoline-x86_64.pl
+++ b/crypto/test/asm/trampoline-x86_64.pl
@@ -139,8 +139,8 @@
 .globl	abi_test_trampoline
 .align	16
 abi_test_trampoline:
-.Labi_test_trampoline_seh_begin:
 .cfi_startproc
+.seh_startproc
 	# Stack layout:
 	#   8 bytes - align
 	#   $caller_state_size bytes - saved caller registers
@@ -178,7 +178,7 @@
 $code .= <<____;
 	subq	\$$stack_alloc_size, %rsp
 .cfi_adjust_cfa_offset	$stack_alloc_size
-.Labi_test_trampoline_seh_prolog_alloc:
+.seh_allocstack	$stack_alloc_size
 ____
 $code .= <<____ if (!$win64);
 	movq	$unwind, $unwind_offset(%rsp)
@@ -186,20 +186,20 @@
 # Store our caller's state. This is needed because we modify it ourselves, and
 # also to isolate the test infrastruction from the function under test failing
 # to save some register.
-my %reg_offsets;
 $code .= store_caller_state($caller_state_offset, "%rsp", sub {
   my ($off, $reg) = @_;
   $reg = substr($reg, 1);
-  $reg_offsets{$reg} = $off;
-  $off -= $stack_alloc_size + 8;
+  # SEH records offsets relative to %rsp (when there is no frame pointer), while
+  # CFI records them relative to the CFA, the value of the parent's stack
+  # pointer just before the call.
+  my $cfi_off = $off - $stack_alloc_size - 8;
+  my $seh_dir = ".seh_savereg";
+  $seh_dir = ".seh_savexmm128" if ($reg =~ /^xmm/);
   return <<____;
-.cfi_offset	$reg, $off
-.Labi_test_trampoline_seh_prolog_$reg:
+.cfi_offset	$reg, $cfi_off
+$seh_dir	\%$reg, $off
 ____
 });
-$code .= <<____;
-.Labi_test_trampoline_seh_prolog_end:
-____
 
 $code .= load_caller_state(0, $state);
 $code .= <<____;
@@ -295,7 +295,7 @@
 	# %rax already contains \$func's return value, unmodified.
 	ret
 .cfi_endproc
-.Labi_test_trampoline_seh_end:
+.seh_endproc
 .size	abi_test_trampoline,.-abi_test_trampoline
 ____
 
@@ -334,10 +334,10 @@
 .align	16
 abi_test_bad_unwind_wrong_register:
 .cfi_startproc
-.Labi_test_bad_unwind_wrong_register_seh_begin:
+.seh_startproc
 	pushq	%r12
-.cfi_push	%r13	# This should be %r12
-.Labi_test_bad_unwind_wrong_register_seh_push_r13:
+.cfi_push	%r13	# This should be %r13
+.seh_pushreg	%r13	# This should be %r13
 	# Windows evaluates epilogs directly in the unwinder, rather than using
 	# unwind codes. Add a nop so there is one non-epilog point (immediately
 	# before the nop) where the unwinder can observe the mistake.
@@ -345,7 +345,7 @@
 	popq	%r12
 .cfi_pop	%r12
 	ret
-.Labi_test_bad_unwind_wrong_register_seh_end:
+.seh_endproc
 .cfi_endproc
 .size	abi_test_bad_unwind_wrong_register,.-abi_test_bad_unwind_wrong_register
 
@@ -357,10 +357,10 @@
 .align	16
 abi_test_bad_unwind_temporary:
 .cfi_startproc
-.Labi_test_bad_unwind_temporary_seh_begin:
+.seh_startproc
 	pushq	%r12
 .cfi_push	%r12
-.Labi_test_bad_unwind_temporary_seh_push_r12:
+.seh_pushreg	%r12
 
 	movq	%r12, %rax
 	inc	%rax
@@ -374,8 +374,8 @@
 	popq	%r12
 .cfi_pop	%r12
 	ret
-.Labi_test_bad_unwind_temporary_seh_end:
 .cfi_endproc
+.seh_endproc
 .size	abi_test_bad_unwind_temporary,.-abi_test_bad_unwind_temporary
 
 # abi_test_get_and_clear_direction_flag clears the direction flag. If the flag
@@ -412,9 +412,9 @@
 .globl	abi_test_bad_unwind_epilog
 .align	16
 abi_test_bad_unwind_epilog:
-.Labi_test_bad_unwind_epilog_seh_begin:
+.seh_startproc
 	pushq	%r12
-.Labi_test_bad_unwind_epilog_seh_push_r12:
+.seh_pushreg	%r12
 
 	nop
 
@@ -422,137 +422,9 @@
 	popq	%r12
 	nop
 	ret
-.Labi_test_bad_unwind_epilog_seh_end:
+.seh_endproc
 .size	abi_test_bad_unwind_epilog,.-abi_test_bad_unwind_epilog
 ____
-
-  # Add unwind metadata for SEH.
-  #
-  # TODO(davidben): This is all manual right now. Once we've added SEH tests,
-  # add support for emitting these in x86_64-xlate.pl, probably based on MASM
-  # and Yasm's unwind directives, and unify with CFI. (Sadly, NASM does not
-  # support these directives.) Then push that upstream to replace the
-  # error-prone and non-standard custom handlers.
-
-  # See https://docs.microsoft.com/en-us/cpp/build/struct-unwind-code?view=vs-2017
-  my $UWOP_PUSH_NONVOL = 0;
-  my $UWOP_ALLOC_LARGE = 1;
-  my $UWOP_ALLOC_SMALL = 2;
-  my $UWOP_SAVE_NONVOL = 4;
-  my $UWOP_SAVE_XMM128 = 8;
-
-  my %UWOP_REG_NUMBER = (rax => 0, rcx => 1, rdx => 2, rbx => 3, rsp => 4,
-                         rbp => 5, rsi => 6, rdi => 7,
-                         map(("r$_" => $_), (8..15)));
-
-  my $unwind_codes = "";
-  my $num_slots = 0;
-  foreach my $reg (reverse @caller_state) {
-    $reg = substr($reg, 1);
-    die "unknown register $reg" unless exists($reg_offsets{$reg});
-    if ($reg =~ /^r/) {
-      die "unknown register $reg" unless exists($UWOP_REG_NUMBER{$reg});
-      my $info = $UWOP_SAVE_NONVOL | ($UWOP_REG_NUMBER{$reg} << 4);
-      my $value = $reg_offsets{$reg} / 8;
-      $unwind_codes .= <<____;
-	.byte	.Labi_test_trampoline_seh_prolog_$reg-.Labi_test_trampoline_seh_begin
-	.byte	$info
-	.value	$value
-____
-      $num_slots += 2;
-    } elsif ($reg =~ /^xmm/) {
-      my $info = $UWOP_SAVE_XMM128 | (substr($reg, 3) << 4);
-      my $value = $reg_offsets{$reg} / 16;
-      $unwind_codes .= <<____;
-	.byte	.Labi_test_trampoline_seh_prolog_$reg-.Labi_test_trampoline_seh_begin
-	.byte	$info
-	.value	$value
-____
-      $num_slots += 2;
-    } else {
-      die "unknown register $reg";
-    }
-  }
-
-  if ($stack_alloc_size <= 128) {
-    my $info = $UWOP_ALLOC_SMALL | ((($stack_alloc_size - 8) / 8) << 4);
-    $unwind_codes .= <<____;
-	.byte	.Labi_test_trampoline_seh_prolog_alloc-.Labi_test_trampoline_seh_begin
-	.byte	$info
-____
-    $num_slots++;
-  } else {
-    die "stack allocation needs three unwind slots" if ($stack_alloc_size > 512 * 1024 + 8);
-    my $info = $UWOP_ALLOC_LARGE;
-    my $value = $stack_alloc_size / 8;
-    $unwind_codes .= <<____;
-	.byte	.Labi_test_trampoline_seh_prolog_alloc-.Labi_test_trampoline_seh_begin
-	.byte	$info
-	.value	$value
-____
-    $num_slots += 2;
-  }
-
-  $code .= <<____;
-.section	.pdata
-.align	4
-	# https://docs.microsoft.com/en-us/cpp/build/struct-runtime-function?view=vs-2017
-	.rva	.Labi_test_trampoline_seh_begin
-	.rva	.Labi_test_trampoline_seh_end
-	.rva	.Labi_test_trampoline_seh_info
-
-	.rva	.Labi_test_bad_unwind_wrong_register_seh_begin
-	.rva	.Labi_test_bad_unwind_wrong_register_seh_end
-	.rva	.Labi_test_bad_unwind_wrong_register_seh_info
-
-	.rva	.Labi_test_bad_unwind_temporary_seh_begin
-	.rva	.Labi_test_bad_unwind_temporary_seh_end
-	.rva	.Labi_test_bad_unwind_temporary_seh_info
-
-	.rva	.Labi_test_bad_unwind_epilog_seh_begin
-	.rva	.Labi_test_bad_unwind_epilog_seh_end
-	.rva	.Labi_test_bad_unwind_epilog_seh_info
-
-.section	.xdata
-.align	8
-.Labi_test_trampoline_seh_info:
-	# https://docs.microsoft.com/en-us/cpp/build/struct-unwind-info?view=vs-2017
-	.byte	1	# version 1, no flags
-	.byte	.Labi_test_trampoline_seh_prolog_end-.Labi_test_trampoline_seh_begin
-	.byte	$num_slots
-	.byte	0	# no frame register
-$unwind_codes
-
-.align	8
-.Labi_test_bad_unwind_wrong_register_seh_info:
-	.byte	1	# version 1, no flags
-	.byte	.Labi_test_bad_unwind_wrong_register_seh_push_r13-.Labi_test_bad_unwind_wrong_register_seh_begin
-	.byte	1	# one slot
-	.byte	0	# no frame register
-
-	.byte	.Labi_test_bad_unwind_wrong_register_seh_push_r13-.Labi_test_bad_unwind_wrong_register_seh_begin
-	.byte	@{[$UWOP_PUSH_NONVOL | ($UWOP_REG_NUMBER{r13} << 4)]}
-
-.align	8
-.Labi_test_bad_unwind_temporary_seh_info:
-	.byte	1	# version 1, no flags
-	.byte	.Labi_test_bad_unwind_temporary_seh_push_r12-.Labi_test_bad_unwind_temporary_seh_begin
-	.byte	1	# one slot
-	.byte	0	# no frame register
-
-	.byte	.Labi_test_bad_unwind_temporary_seh_push_r12-.Labi_test_bad_unwind_temporary_seh_begin
-	.byte	@{[$UWOP_PUSH_NONVOL | ($UWOP_REG_NUMBER{r12} << 4)]}
-
-.align	8
-.Labi_test_bad_unwind_epilog_seh_info:
-	.byte	1	# version 1, no flags
-	.byte	.Labi_test_bad_unwind_epilog_seh_push_r12-.Labi_test_bad_unwind_epilog_seh_begin
-	.byte	1	# one slot
-	.byte	0	# no frame register
-
-	.byte	.Labi_test_bad_unwind_epilog_seh_push_r12-.Labi_test_bad_unwind_epilog_seh_begin
-	.byte	@{[$UWOP_PUSH_NONVOL | ($UWOP_REG_NUMBER{r12} << 4)]}
-____
 }
 
 print $code;