Document expected use of BTI and PAC macros.

https://boringssl-review.googlesource.com/c/boringssl/+/42084's commit
message did a good job of explaining how BTI and PAC work, but we're
missing some documentation in the header on conventions. I think these
are right?

Bug: 409
Change-Id: I959e68d3ca076d0bdf9d1f2b5a5f0450023de4d6
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/47204
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/include/openssl/arm_arch.h b/include/openssl/arm_arch.h
index 31ff8a6..81dc796 100644
--- a/include/openssl/arm_arch.h
+++ b/include/openssl/arm_arch.h
@@ -124,7 +124,72 @@
 //   - Armv8.5-A Branch Target Identification
 // features which require emitting a .note.gnu.property section with the
 // appropriate architecture-dependent feature bits set.
-// Read more: "ELF for the Arm® 64-bit Architecture"
+//
+// |AARCH64_SIGN_LINK_REGISTER| and |AARCH64_VALIDATE_LINK_REGISTER| expand to
+// PACIxSP and AUTIxSP, respectively. |AARCH64_SIGN_LINK_REGISTER| should be
+// used immediately before saving the LR register (x30) to the stack.
+// |AARCH64_VALIDATE_LINK_REGISTER| should be used immediately after restoring
+// it. Note |AARCH64_SIGN_LINK_REGISTER|'s modifications to LR must be undone
+// with |AARCH64_VALIDATE_LINK_REGISTER| before RET. The SP register must also
+// have the same value at the two points. For example:
+//
+//   .global f
+//   f:
+//     AARCH64_SIGN_LINK_REGISTER
+//     stp x29, x30, [sp, #-96]!
+//     mov x29, sp
+//     ...
+//     ldp x29, x30, [sp], #96
+//     AARCH64_VALIDATE_LINK_REGISTER
+//     ret
+//
+// |AARCH64_VALID_CALL_TARGET| expands to BTI 'c'. Either it, or
+// |AARCH64_SIGN_LINK_REGISTER|, must be used at every point that may be an
+// indirect call target. In particular, all symbols exported from a file must
+// begin with one of these macros. For example, a leaf function that does not
+// save LR can instead use |AARCH64_VALID_CALL_TARGET|:
+//
+//   .globl return_zero
+//   return_zero:
+//     AARCH64_VALID_CALL_TARGET
+//     mov x0, #0
+//     ret
+//
+// A non-leaf function which does not immediately save LR may need both macros
+// because |AARCH64_SIGN_LINK_REGISTER| appears late. For example, the function
+// may jump to an alternate implementation before setting up the stack:
+//
+//   .globl with_early_jump
+//   with_early_jump:
+//     AARCH64_VALID_CALL_TARGET
+//     cmp x0, #128
+//     b.lt .Lwith_early_jump_128
+//     AARCH64_SIGN_LINK_REGISTER
+//     stp x29, x30, [sp, #-96]!
+//     mov x29, sp
+//     ...
+//     ldp x29, x30, [sp], #96
+//     AARCH64_VALIDATE_LINK_REGISTER
+//     ret
+//
+//  .Lwith_early_jump_128:
+//     ...
+//     ret
+//
+// These annotations are only required with indirect calls. Private symbols that
+// are only the target of direct calls do not require annotations. Also note
+// that |AARCH64_VALID_CALL_TARGET| is only valid for indirect calls (BLR), not
+// indirect jumps (BR). Indirect jumps in assembly are currently not supported
+// and would require a macro for BTI 'j'.
+//
+// Although not necessary, it is safe to use these macros in 32-bit ARM
+// assembly. This may be used to simplify dual 32-bit and 64-bit files.
+//
+// References:
+// - "ELF for the Arm® 64-bit Architecture"
+//   https://github.com/ARM-software/abi-aa/blob/master/aaelf64/aaelf64.rst
+// - "Providing protection for complex software"
+//   https://developer.arm.com/architectures/learn-the-architecture/providing-protection-for-complex-software
 
 #if defined(__ARM_FEATURE_BTI_DEFAULT) && __ARM_FEATURE_BTI_DEFAULT == 1
 #define GNU_PROPERTY_AARCH64_BTI (1 << 0)   // Has Branch Target Identification