ppc64le delocate: avoid r0 as a base register.

On POWER, r0 is wired to zero in some argument positions of some
instructions. The base register for a load is one of them. Thus, if
rewriting a load to r0, we cannot use r0 to store the base address.

This could be more efficient, but loading to r0 appears to be very rare
so I'm not going to worry about it for now.

Change-Id: I14dac96ba4c0380b166a7667b0cba918f1ae25ec
Reviewed-on: https://boringssl-review.googlesource.com/17065
Commit-Queue: Adam Langley <agl@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
diff --git a/util/fipstools/delocate.go b/util/fipstools/delocate.go
index 75eb194..a479929 100644
--- a/util/fipstools/delocate.go
+++ b/util/fipstools/delocate.go
@@ -709,8 +709,23 @@
 						return nil, errors.New("expected single register in BaseIndexScale for ld argument")
 					}
 
+					baseReg := destReg
+					if baseReg == "0" {
+						// Register zero is special as the base register for a load.
+						// Avoid it by spilling and using r3 instead.
+						baseReg = "3"
+						wrappers = append(wrappers, func(k func()) {
+							d.output.WriteString("\taddi 1, 1, -288\n")   // Clear the red zone.
+							d.output.WriteString("\tstd " + baseReg + ", -8(1)\n")
+							d.output.WriteString("\tmr " + baseReg + ", " + destReg + "\n")
+							k()
+							d.output.WriteString("\tld " + baseReg + ", -8(1)\n")
+							d.output.WriteString("\taddi 1, 1, 288\n")   // Clear the red zone.
+						})
+					}
+
 					wrappers = append(wrappers, func(k func()) {
-						d.output.WriteString("\t" + origInstructionName + " " + destReg + ", 0(" + destReg + ")\n")
+						d.output.WriteString("\t" + origInstructionName + " " + destReg + ", 0(" + baseReg + ")\n")
 					})
 				default:
 					return nil, fmt.Errorf("can't process TOC argument to %q", instructionName)
diff --git a/util/fipstools/delocate_test.go b/util/fipstools/delocate_test.go
index 6eae786..e0ecc17 100644
--- a/util/fipstools/delocate_test.go
+++ b/util/fipstools/delocate_test.go
@@ -38,15 +38,16 @@
 }
 
 var delocateTests = []delocateTest{
-	{"x86_64-Basic", []string{"in.s"}, "out.s"},
-	{"x86_64-Sections", []string{"in.s"}, "out.s"},
-	{"x86_64-LabelRewrite", []string{"in1.s", "in2.s"}, "out.s"},
-	{"x86_64-GOTRewrite", []string{"in.s"}, "out.s"},
-	{"x86_64-BSS", []string{"in.s"}, "out.s"},
-	{"ppc64le-Sample", []string{"in.s"}, "out.s"},
-	{"ppc64le-Sample2", []string{"in.s"}, "out.s"},
-	{"ppc64le-TOCWithOffset", []string{"in.s"}, "out.s"},
 	{"ppc64le-GlobalEntry", []string{"in.s"}, "out.s"},
+	{"ppc64le-LoadToR0", []string{"in.s"}, "out.s"},
+	{"ppc64le-Sample2", []string{"in.s"}, "out.s"},
+	{"ppc64le-Sample", []string{"in.s"}, "out.s"},
+	{"ppc64le-TOCWithOffset", []string{"in.s"}, "out.s"},
+	{"x86_64-Basic", []string{"in.s"}, "out.s"},
+	{"x86_64-BSS", []string{"in.s"}, "out.s"},
+	{"x86_64-GOTRewrite", []string{"in.s"}, "out.s"},
+	{"x86_64-LabelRewrite", []string{"in1.s", "in2.s"}, "out.s"},
+	{"x86_64-Sections", []string{"in.s"}, "out.s"},
 }
 
 func TestDelocate(t *testing.T) {
diff --git a/util/fipstools/testdata/ppc64le-LoadToR0/in.s b/util/fipstools/testdata/ppc64le-LoadToR0/in.s
new file mode 100644
index 0000000..81766dc
--- /dev/null
+++ b/util/fipstools/testdata/ppc64le-LoadToR0/in.s
@@ -0,0 +1,4 @@
+	.text
+foo:
+	addis 22,2,bar@toc@ha
+	ld 0,bar@toc@l(22)
diff --git a/util/fipstools/testdata/ppc64le-LoadToR0/out.s b/util/fipstools/testdata/ppc64le-LoadToR0/out.s
new file mode 100644
index 0000000..c42dbe0
--- /dev/null
+++ b/util/fipstools/testdata/ppc64le-LoadToR0/out.s
@@ -0,0 +1,101 @@
+.text
+BORINGSSL_bcm_text_start:
+	.text
+.Lfoo_local_target:
+foo:
+# WAS addis 22,2,bar@toc@ha
+# WAS ld 0,bar@toc@l(22)
+	addi 1, 1, -288
+	mflr 0
+	std 0, -8(1)
+	std 3, -16(1)
+	bl .Lbcm_loadtoc_bar
+	std 3, -24(1)
+	ld 3, -8(1)
+	mtlr 3
+	ld 0, -24(1)
+	ld 3, -16(1)
+	addi 1, 1, 288
+	addi 1, 1, -288
+	std 3, -8(1)
+	mr 3, 0
+	ld 0, 0(3)
+	ld 3, -8(1)
+	addi 1, 1, 288
+.text
+BORINGSSL_bcm_text_end:
+.type bcm_loadtoc_bar, @function
+bcm_loadtoc_bar:
+.Lbcm_loadtoc_bar:
+	addis 3, 2, bar@toc@ha
+	addi 3, 3, bar@toc@l
+	blr
+.LBORINGSSL_external_toc:
+.quad .TOC.-.LBORINGSSL_external_toc
+.type BORINGSSL_bcm_text_hash, @object
+.size BORINGSSL_bcm_text_hash, 64
+BORINGSSL_bcm_text_hash:
+.byte 0xae
+.byte 0x2c
+.byte 0xea
+.byte 0x2a
+.byte 0xbd
+.byte 0xa6
+.byte 0xf3
+.byte 0xec
+.byte 0x97
+.byte 0x7f
+.byte 0x9b
+.byte 0xf6
+.byte 0x94
+.byte 0x9a
+.byte 0xfc
+.byte 0x83
+.byte 0x68
+.byte 0x27
+.byte 0xcb
+.byte 0xa0
+.byte 0xa0
+.byte 0x9f
+.byte 0x6b
+.byte 0x6f
+.byte 0xde
+.byte 0x52
+.byte 0xcd
+.byte 0xe2
+.byte 0xcd
+.byte 0xff
+.byte 0x31
+.byte 0x80
+.byte 0xa2
+.byte 0xd4
+.byte 0xc3
+.byte 0x66
+.byte 0xf
+.byte 0xc2
+.byte 0x6a
+.byte 0x7b
+.byte 0xf4
+.byte 0xbe
+.byte 0x39
+.byte 0xa2
+.byte 0xd7
+.byte 0x25
+.byte 0xdb
+.byte 0x21
+.byte 0x98
+.byte 0xe9
+.byte 0xd5
+.byte 0x53
+.byte 0xbf
+.byte 0x5c
+.byte 0x32
+.byte 0x6
+.byte 0x83
+.byte 0x34
+.byte 0xc
+.byte 0x65
+.byte 0x89
+.byte 0x52
+.byte 0xbd
+.byte 0x1f