Batch inversions in Trust Tokens.

The DLEQ and DLEQOR proofs require converting many Jacobian points to
affine, some multiple times. The inversions involved can be batched.

This buys us a +5-8% improvement in token issuance speed. issue and
finish_issue should each only perform two inversions per token now. We
could save an inversion per token by changing the dleq_generate and
dleq_verify function signatures, but that would complicate the likely
more valuable batched DLEQ(OR) optimization, so I've left those alone.

Before:
Did 300 TrustToken-Exp0-Batch1 generate_key operations in 2031798us (147.7 ops/sec)
Did 1449 TrustToken-Exp0-Batch1 begin_issuance operations in 2093639us (692.1 ops/sec)
Did 96 TrustToken-Exp0-Batch1 issue operations in 2044310us (47.0 ops/sec)
Did 84 TrustToken-Exp0-Batch1 finish_issuance operations in 2072137us (40.5 ops/sec)
Did 12170000 TrustToken-Exp0-Batch1 begin_redemption operations in 2000098us (6084701.8 ops/sec)
Did 315 TrustToken-Exp0-Batch1 redeem operations in 2091938us (150.6 ops/sec)
Did 35000 TrustToken-Exp0-Batch1 finish_redemption operations in 2004900us (17457.2 ops/sec)
Did 308 TrustToken-Exp0-Batch10 generate_key operations in 2067860us (148.9 ops/sec)
Did 138 TrustToken-Exp0-Batch10 begin_issuance operations in 2005706us (68.8 ops/sec)
Did 9 TrustToken-Exp0-Batch10 issue operations in 2107753us (4.3 ops/sec)
Did 8 TrustToken-Exp0-Batch10 finish_issuance operations in 2193489us (3.6 ops/sec)
Did 12046750 TrustToken-Exp0-Batch10 begin_redemption operations in 2000025us (6023299.7 ops/sec)
Did 315 TrustToken-Exp0-Batch10 redeem operations in 2091940us (150.6 ops/sec)
Did 35000 TrustToken-Exp0-Batch10 finish_redemption operations in 2008851us (17422.9 ops/sec)

Did 756 TrustToken-Exp1-Batch1 generate_key operations in 2051005us (368.6 ops/sec)
Did 3633 TrustToken-Exp1-Batch1 begin_issuance operations in 2072577us (1752.9 ops/sec)
Did 242 TrustToken-Exp1-Batch1 issue operations in 2052091us (117.9 ops/sec)
Did 210 TrustToken-Exp1-Batch1 finish_issuance operations in 2058740us (102.0 ops/sec)
Did 12477000 TrustToken-Exp1-Batch1 begin_redemption operations in 2000004us (6238487.5 ops/sec)
Did 777 TrustToken-Exp1-Batch1 redeem operations in 2084953us (372.7 ops/sec)
Did 35000 TrustToken-Exp1-Batch1 finish_redemption operations in 2028286us (17255.9 ops/sec)
Did 756 TrustToken-Exp1-Batch10 generate_key operations in 2051178us (368.6 ops/sec)
Did 357 TrustToken-Exp1-Batch10 begin_issuance operations in 2041875us (174.8 ops/sec)
Did 23 TrustToken-Exp1-Batch10 issue operations in 2026494us (11.3 ops/sec)
Did 20 TrustToken-Exp1-Batch10 finish_issuance operations in 2048478us (9.8 ops/sec)
Did 12492000 TrustToken-Exp1-Batch10 begin_redemption operations in 2000053us (6245834.5 ops/sec)
Did 777 TrustToken-Exp1-Batch10 redeem operations in 2084956us (372.7 ops/sec)
Did 36000 TrustToken-Exp1-Batch10 finish_redemption operations in 2021991us (17804.2 ops/sec)

After:
Did 315 TrustToken-Exp0-Batch1 generate_key operations in 2046638us (153.9 ops/sec) [+4.2%]
Did 1449 TrustToken-Exp0-Batch1 begin_issuance operations in 2087930us (694.0 ops/sec) [+0.3%]
Did 105 TrustToken-Exp0-Batch1 issue operations in 2071104us (50.7 ops/sec) [+8.0%]
Did 88 TrustToken-Exp0-Batch1 finish_issuance operations in 2023502us (43.5 ops/sec) [+7.3%]
Did 11847000 TrustToken-Exp0-Batch1 begin_redemption operations in 2000041us (5923378.6 ops/sec) [-2.7%]
Did 315 TrustToken-Exp0-Batch1 redeem operations in 2084116us (151.1 ops/sec) [+0.4%]
Did 35000 TrustToken-Exp0-Batch1 finish_redemption operations in 2003732us (17467.4 ops/sec) [+0.1%]
Did 315 TrustToken-Exp0-Batch10 generate_key operations in 2046863us (153.9 ops/sec) [+3.3%]
Did 138 TrustToken-Exp0-Batch10 begin_issuance operations in 2000108us (69.0 ops/sec) [+0.3%]
Did 10 TrustToken-Exp0-Batch10 issue operations in 2149283us (4.7 ops/sec) [+9.0%]
Did 8 TrustToken-Exp0-Batch10 finish_issuance operations in 2046416us (3.9 ops/sec) [+7.2%]
Did 12112000 TrustToken-Exp0-Batch10 begin_redemption operations in 2000077us (6055766.9 ops/sec) [+0.5%]
Did 315 TrustToken-Exp0-Batch10 redeem operations in 2084427us (151.1 ops/sec) [+0.4%]
Did 35000 TrustToken-Exp0-Batch10 finish_redemption operations in 2015111us (17368.8 ops/sec) [-0.3%]

Did 777 TrustToken-Exp1-Batch1 generate_key operations in 2029777us (382.8 ops/sec) [+3.9%]
Did 3654 TrustToken-Exp1-Batch1 begin_issuance operations in 2093484us (1745.4 ops/sec) [-0.4%]
Did 252 TrustToken-Exp1-Batch1 issue operations in 2024557us (124.5 ops/sec) [+5.5%]
Did 220 TrustToken-Exp1-Batch1 finish_issuance operations in 2034633us (108.1 ops/sec) [+6.0%]
Did 12659000 TrustToken-Exp1-Batch1 begin_redemption operations in 2000112us (6329145.6 ops/sec) [+1.5%]
Did 777 TrustToken-Exp1-Batch1 redeem operations in 2073783us (374.7 ops/sec) [+0.5%]
Did 35000 TrustToken-Exp1-Batch1 finish_redemption operations in 2050371us (17070.1 ops/sec) [-1.1%]
Did 768 TrustToken-Exp1-Batch10 generate_key operations in 2025482us (379.2 ops/sec) [+2.9%]
Did 357 TrustToken-Exp1-Batch10 begin_issuance operations in 2034429us (175.5 ops/sec) [+0.4%]
Did 25 TrustToken-Exp1-Batch10 issue operations in 2049293us (12.2 ops/sec) [+7.5%]
Did 21 TrustToken-Exp1-Batch10 finish_issuance operations in 2022256us (10.4 ops/sec) [+6.4%]
Did 12702000 TrustToken-Exp1-Batch10 begin_redemption operations in 2000015us (6350952.4 ops/sec) [+1.7%]
Did 777 TrustToken-Exp1-Batch10 redeem operations in 2072048us (375.0 ops/sec) [+0.6%]
Did 35000 TrustToken-Exp1-Batch10 finish_redemption operations in 2024580us (17287.5 ops/sec) [-2.9%]

Change-Id: Ia1b09cd14aa8ce0935d18033fb4bd75666a258e9
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/41086
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Steven Valdez <svaldez@google.com>
diff --git a/crypto/fipsmodule/ec/ec.c b/crypto/fipsmodule/ec/ec.c
index 56a9e1d..79dc416 100644
--- a/crypto/fipsmodule/ec/ec.c
+++ b/crypto/fipsmodule/ec/ec.c
@@ -809,6 +809,15 @@
   return group->meth->point_get_affine_coordinates(group, p, &out->X, &out->Y);
 }
 
+int ec_jacobian_to_affine_batch(const EC_GROUP *group, EC_AFFINE *out,
+                                const EC_RAW_POINT *in, size_t num) {
+  if (group->meth->jacobian_to_affine_batch == NULL) {
+    OPENSSL_PUT_ERROR(EC, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
+    return 0;
+  }
+  return group->meth->jacobian_to_affine_batch(group, out, in, num);
+}
+
 int ec_point_set_affine_coordinates(const EC_GROUP *group, EC_AFFINE *out,
                                     const EC_FELEM *x, const EC_FELEM *y) {
   void (*const felem_mul)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
@@ -1046,6 +1055,12 @@
   ec_felem_select(group, &out->Z, mask, &a->Z, &b->Z);
 }
 
+void ec_affine_select(const EC_GROUP *group, EC_AFFINE *out, BN_ULONG mask,
+                      const EC_AFFINE *a, const EC_AFFINE *b) {
+  ec_felem_select(group, &out->X, mask, &a->X, &b->X);
+  ec_felem_select(group, &out->Y, mask, &a->Y, &b->Y);
+}
+
 int ec_cmp_x_coordinate(const EC_GROUP *group, const EC_RAW_POINT *p,
                         const EC_SCALAR *r) {
   return group->meth->cmp_x_coordinate(group, p, r);
diff --git a/crypto/fipsmodule/ec/ec_montgomery.c b/crypto/fipsmodule/ec/ec_montgomery.c
index 02bd66c..de515ab 100644
--- a/crypto/fipsmodule/ec/ec_montgomery.c
+++ b/crypto/fipsmodule/ec/ec_montgomery.c
@@ -200,6 +200,52 @@
   return 1;
 }
 
+static int ec_GFp_mont_jacobian_to_affine_batch(const EC_GROUP *group,
+                                                EC_AFFINE *out,
+                                                const EC_RAW_POINT *in,
+                                                size_t num) {
+  if (num == 0) {
+    return 1;
+  }
+
+  // Compute prefix products of all Zs. Use |out[i].X| as scratch space
+  // to store these values.
+  out[0].X = in[0].Z;
+  for (size_t i = 1; i < num; i++) {
+    ec_GFp_mont_felem_mul(group, &out[i].X, &out[i - 1].X, &in[i].Z);
+  }
+
+  // Some input was infinity iff the product of all Zs is zero.
+  if (ec_felem_non_zero_mask(group, &out[num - 1].X) == 0) {
+    OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
+    return 0;
+  }
+
+  // Invert the product of all Zs.
+  EC_FELEM zinvprod;
+  ec_GFp_mont_felem_inv0(group, &zinvprod, &out[num - 1].X);
+  for (size_t i = num - 1; i < num; i--) {
+    // Our loop invariant is that |zinvprod| is Z0^-1 * Z1^-1 * ... * Zi^-1.
+    // Recover Zi^-1 by multiplying by the previous product.
+    EC_FELEM zinv, zinv2;
+    if (i == 0) {
+      zinv = zinvprod;
+    } else {
+      ec_GFp_mont_felem_mul(group, &zinv, &zinvprod, &out[i - 1].X);
+      // Maintain the loop invariant for the next iteration.
+      ec_GFp_mont_felem_mul(group, &zinvprod, &zinvprod, &in[i].Z);
+    }
+
+    // Compute affine coordinates: x = X * Z^-2 and y = Y * Z^-3.
+    ec_GFp_mont_felem_sqr(group, &zinv2, &zinv);
+    ec_GFp_mont_felem_mul(group, &out[i].X, &in[i].X, &zinv2);
+    ec_GFp_mont_felem_mul(group, &out[i].Y, &in[i].Y, &zinv2);
+    ec_GFp_mont_felem_mul(group, &out[i].Y, &out[i].Y, &zinv);
+  }
+
+  return 1;
+}
+
 void ec_GFp_mont_add(const EC_GROUP *group, EC_RAW_POINT *out,
                      const EC_RAW_POINT *a, const EC_RAW_POINT *b) {
   if (a == b) {
@@ -456,6 +502,7 @@
   out->group_finish = ec_GFp_mont_group_finish;
   out->group_set_curve = ec_GFp_mont_group_set_curve;
   out->point_get_affine_coordinates = ec_GFp_mont_point_get_affine_coordinates;
+  out->jacobian_to_affine_batch = ec_GFp_mont_jacobian_to_affine_batch;
   out->add = ec_GFp_mont_add;
   out->dbl = ec_GFp_mont_dbl;
   out->mul = ec_GFp_mont_mul;
diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h
index ef6014c..fed81a5 100644
--- a/crypto/fipsmodule/ec/internal.h
+++ b/crypto/fipsmodule/ec/internal.h
@@ -282,6 +282,15 @@
 int ec_jacobian_to_affine(const EC_GROUP *group, EC_AFFINE *out,
                           const EC_RAW_POINT *p);
 
+// ec_jacobian_to_affine_batch converts |num| points in |in| from Jacobian
+// coordinates to affine coordinates and writes the results to |out|. It returns
+// one on success and zero if any of the input points were infinity.
+//
+// This function is not implemented for all curves. Add implementations as
+// needed.
+int ec_jacobian_to_affine_batch(const EC_GROUP *group, EC_AFFINE *out,
+                                const EC_RAW_POINT *in, size_t num);
+
 // ec_point_set_affine_coordinates sets |out|'s to a point with affine
 // coordinates |x| and |y|. It returns one if the point is on the curve and
 // zero otherwise. If the point is not on the curve, the value of |out| is
@@ -313,6 +322,10 @@
 void ec_point_select(const EC_GROUP *group, EC_RAW_POINT *out, BN_ULONG mask,
                      const EC_RAW_POINT *a, const EC_RAW_POINT *b);
 
+// ec_affine_select behaves like |ec_point_select| but acts on affine points.
+void ec_affine_select(const EC_GROUP *group, EC_AFFINE *out, BN_ULONG mask,
+                      const EC_AFFINE *a, const EC_AFFINE *b);
+
 // ec_cmp_x_coordinate compares the x (affine) coordinate of |p|, mod the group
 // order, with |r|. It returns one if the values match and zero if |p| is the
 // point at infinity of the values do not match.
@@ -350,6 +363,11 @@
 // external APIs not checking the return value.
 void ec_set_to_safe_point(const EC_GROUP *group, EC_RAW_POINT *out);
 
+// ec_affine_jacobian_equal returns one if |a| and |b| represent the same point
+// and zero otherwise. It treats both inputs as secret.
+int ec_affine_jacobian_equal(const EC_GROUP *group, const EC_AFFINE *a,
+                             const EC_RAW_POINT *b);
+
 
 // Implementation details.
 
@@ -365,6 +383,10 @@
   int (*point_get_affine_coordinates)(const EC_GROUP *, const EC_RAW_POINT *p,
                                       EC_FELEM *x, EC_FELEM *y);
 
+  // jacobian_to_affine_batch implements |ec_jacobian_to_affine_batch|.
+  int (*jacobian_to_affine_batch)(const EC_GROUP *group, EC_AFFINE *out,
+                                  const EC_RAW_POINT *in, size_t num);
+
   // add sets |r| to |a| + |b|.
   void (*add)(const EC_GROUP *group, EC_RAW_POINT *r, const EC_RAW_POINT *a,
               const EC_RAW_POINT *b);
diff --git a/crypto/fipsmodule/ec/simple.c b/crypto/fipsmodule/ec/simple.c
index bd420b8..b6d9312 100644
--- a/crypto/fipsmodule/ec/simple.c
+++ b/crypto/fipsmodule/ec/simple.c
@@ -284,6 +284,36 @@
   return equal & 1;
 }
 
+int ec_affine_jacobian_equal(const EC_GROUP *group, const EC_AFFINE *a,
+                             const EC_RAW_POINT *b) {
+  // If |b| is not infinity, we have to decide whether
+  //     (X_a, Y_a) = (X_b/Z_b^2, Y_b/Z_b^3),
+  // or equivalently, whether
+  //     (X_a*Z_b^2, Y_a*Z_b^3) = (X_b, Y_b).
+
+  void (*const felem_mul)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
+                          const EC_FELEM *b) = group->meth->felem_mul;
+  void (*const felem_sqr)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a) =
+      group->meth->felem_sqr;
+
+  EC_FELEM tmp, Zb2;
+  felem_sqr(group, &Zb2, &b->Z);        // Zb2 = Z_b^2
+  felem_mul(group, &tmp, &a->X, &Zb2);  // tmp = X_a * Z_b^2
+  ec_felem_sub(group, &tmp, &tmp, &b->X);
+  const BN_ULONG x_not_equal = ec_felem_non_zero_mask(group, &tmp);
+
+  felem_mul(group, &tmp, &a->Y, &Zb2);  // tmp = Y_a * Z_b^2
+  felem_mul(group, &tmp, &tmp, &b->Z);  // tmp = Y_a * Z_b^3
+  ec_felem_sub(group, &tmp, &tmp, &b->Y);
+  const BN_ULONG y_not_equal = ec_felem_non_zero_mask(group, &tmp);
+  const BN_ULONG x_and_y_equal = ~(x_not_equal | y_not_equal);
+
+  const BN_ULONG b_not_infinity = ec_felem_non_zero_mask(group, &b->Z);
+
+  const BN_ULONG equal = b_not_infinity & x_and_y_equal;
+  return equal & 1;
+}
+
 int ec_GFp_simple_cmp_x_coordinate(const EC_GROUP *group, const EC_RAW_POINT *p,
                                    const EC_SCALAR *r) {
   if (ec_GFp_simple_is_at_infinity(group, p)) {
diff --git a/crypto/trust_token/internal.h b/crypto/trust_token/internal.h
index 1c82d83..6c9904d 100644
--- a/crypto/trust_token/internal.h
+++ b/crypto/trust_token/internal.h
@@ -42,9 +42,9 @@
 #define PMBTOKEN_NONCE_SIZE 64
 
 typedef struct {
-  EC_RAW_POINT pub0;
-  EC_RAW_POINT pub1;
-  EC_RAW_POINT pubs;
+  EC_AFFINE pub0;
+  EC_AFFINE pub1;
+  EC_AFFINE pubs;
 } PMBTOKEN_CLIENT_KEY;
 
 typedef struct {
@@ -54,9 +54,9 @@
   EC_SCALAR y1;
   EC_SCALAR xs;
   EC_SCALAR ys;
-  EC_RAW_POINT pub0;
-  EC_RAW_POINT pub1;
-  EC_RAW_POINT pubs;
+  EC_AFFINE pub0;
+  EC_AFFINE pub1;
+  EC_AFFINE pubs;
 } PMBTOKEN_ISSUER_KEY;
 
 // PMBTOKEN_PRETOKEN represents the intermediate state a client keeps during a
@@ -64,7 +64,7 @@
 typedef struct pmb_pretoken_st {
   uint8_t t[PMBTOKEN_NONCE_SIZE];
   EC_SCALAR r;
-  EC_RAW_POINT Tp;
+  EC_AFFINE Tp;
 } PMBTOKEN_PRETOKEN;
 
 // PMBTOKEN_PRETOKEN_free releases the memory associated with |token|.
diff --git a/crypto/trust_token/pmbtoken.c b/crypto/trust_token/pmbtoken.c
index 5b1b883..f1dc39b 100644
--- a/crypto/trust_token/pmbtoken.c
+++ b/crypto/trust_token/pmbtoken.c
@@ -39,7 +39,7 @@
                 const uint8_t t[PMBTOKEN_NONCE_SIZE]);
   // hash_s implements the H_s operation in PMBTokens. It returns on on success
   // and zero on error.
-  int (*hash_s)(const EC_GROUP *group, EC_RAW_POINT *out, const EC_RAW_POINT *t,
+  int (*hash_s)(const EC_GROUP *group, EC_RAW_POINT *out, const EC_AFFINE *t,
                 const uint8_t s[PMBTOKEN_NONCE_SIZE]);
   // hash_c implements the H_c operation in PMBTokens. It returns on on success
   // and zero on error.
@@ -112,32 +112,26 @@
 }
 
 static int point_to_cbb(CBB *out, const EC_GROUP *group,
-                        const EC_RAW_POINT *point) {
-  EC_AFFINE affine;
-  if (!ec_jacobian_to_affine(group, &affine, point)) {
-    return 0;
-  }
+                        const EC_AFFINE *point) {
   size_t len =
-      ec_point_to_bytes(group, &affine, POINT_CONVERSION_UNCOMPRESSED, NULL, 0);
+      ec_point_to_bytes(group, point, POINT_CONVERSION_UNCOMPRESSED, NULL, 0);
   if (len == 0) {
     return 0;
   }
   uint8_t *p;
   return CBB_add_space(out, &p, len) &&
-         ec_point_to_bytes(group, &affine, POINT_CONVERSION_UNCOMPRESSED, p,
+         ec_point_to_bytes(group, point, POINT_CONVERSION_UNCOMPRESSED, p,
                            len) == len;
 }
 
 static int cbs_get_prefixed_point(CBS *cbs, const EC_GROUP *group,
-                                  EC_RAW_POINT *out) {
+                                  EC_AFFINE *out) {
   CBS child;
-  EC_AFFINE affine;
   if (!CBS_get_u16_length_prefixed(cbs, &child) ||
-      !ec_point_from_uncompressed(group, &affine, CBS_data(&child),
+      !ec_point_from_uncompressed(group, out, CBS_data(&child),
                                   CBS_len(&child))) {
     return 0;
   }
-  ec_affine_to_jacobian(group, out, &affine);
   return 1;
 }
 
@@ -148,11 +142,11 @@
 static int pmbtoken_generate_key(const PMBTOKEN_METHOD *method,
                                  CBB *out_private, CBB *out_public) {
   const EC_GROUP *group = method->group;
-  EC_RAW_POINT pub0, pub1, pubs;
+  EC_RAW_POINT pub[3];
   EC_SCALAR x0, y0, x1, y1, xs, ys;
-  if (!generate_keypair(method, &x0, &y0, &pub0) ||
-      !generate_keypair(method, &x1, &y1, &pub1) ||
-      !generate_keypair(method, &xs, &ys, &pubs)) {
+  if (!generate_keypair(method, &x0, &y0, &pub[0]) ||
+      !generate_keypair(method, &x1, &y1, &pub[1]) ||
+      !generate_keypair(method, &xs, &ys, &pub[2])) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_KEYGEN_FAILURE);
     return 0;
   }
@@ -168,15 +162,20 @@
     ec_scalar_to_bytes(group, buf, &scalar_len, scalars[i]);
   }
 
+  EC_AFFINE pub_affine[3];
+  if (!ec_jacobian_to_affine_batch(group, pub_affine, pub, 3)) {
+    return 0;
+  }
+
   // TODO(https://crbug.com/boringssl/331): When updating the key format, remove
   // the redundant length prefixes.
   CBB child;
   if (!CBB_add_u16_length_prefixed(out_public, &child) ||
-      !point_to_cbb(&child, group, &pub0) ||
+      !point_to_cbb(&child, group, &pub_affine[0]) ||
       !CBB_add_u16_length_prefixed(out_public, &child) ||
-      !point_to_cbb(&child, group, &pub1) ||
+      !point_to_cbb(&child, group, &pub_affine[1]) ||
       !CBB_add_u16_length_prefixed(out_public, &child) ||
-      !point_to_cbb(&child, group, &pubs) ||
+      !point_to_cbb(&child, group, &pub_affine[2]) ||
       !CBB_flush(out_public)) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_BUFFER_TOO_SMALL);
     return 0;
@@ -222,12 +221,18 @@
   }
 
   // Recompute the public key.
-  if (!mul_twice_base(group, &key->pubs, &key->xs, &method->h, &key->ys) ||
-      !mul_twice_base(group, &key->pub0, &key->x0, &method->h, &key->y0) ||
-      !mul_twice_base(group, &key->pub1, &key->x1, &method->h, &key->y1)) {
+  EC_RAW_POINT pub[3];
+  EC_AFFINE pub_affine[3];
+  if (!mul_twice_base(group, &pub[0], &key->x0, &method->h, &key->y0) ||
+      !mul_twice_base(group, &pub[1], &key->x1, &method->h, &key->y1) ||
+      !mul_twice_base(group, &pub[2], &key->xs, &method->h, &key->ys) ||
+      !ec_jacobian_to_affine_batch(group, pub_affine, pub, 3)) {
     return 0;
   }
 
+  key->pub0 = pub_affine[0];
+  key->pub1 = pub_affine[1];
+  key->pubs = pub_affine[2];
   return 1;
 }
 
@@ -265,9 +270,10 @@
     ec_scalar_from_montgomery(group, &pretoken->r, &pretoken->r);
     ec_scalar_from_montgomery(group, &rinv, &rinv);
 
-    EC_RAW_POINT T;
+    EC_RAW_POINT T, Tp;
     if (!method->hash_t(group, &T, pretoken->t) ||
-        !ec_point_mul_scalar(group, &pretoken->Tp, &T, &rinv)) {
+        !ec_point_mul_scalar(group, &Tp, &T, &rinv) ||
+        !ec_jacobian_to_affine(group, &pretoken->Tp, &Tp)) {
       goto err;
     }
 
@@ -313,9 +319,9 @@
 }
 
 static int hash_c_dleq(const PMBTOKEN_METHOD *method, EC_SCALAR *out,
-                       const EC_RAW_POINT *X, const EC_RAW_POINT *T,
-                       const EC_RAW_POINT *S, const EC_RAW_POINT *W,
-                       const EC_RAW_POINT *K0, const EC_RAW_POINT *K1) {
+                       const EC_AFFINE *X, const EC_AFFINE *T,
+                       const EC_AFFINE *S, const EC_AFFINE *W,
+                       const EC_AFFINE *K0, const EC_AFFINE *K1) {
   static const uint8_t kDLEQ2Label[] = "DLEQ2";
 
   int ok = 0;
@@ -346,11 +352,11 @@
 }
 
 static int hash_c_dleqor(const PMBTOKEN_METHOD *method, EC_SCALAR *out,
-                         const EC_RAW_POINT *X0, const EC_RAW_POINT *X1,
-                         const EC_RAW_POINT *T, const EC_RAW_POINT *S,
-                         const EC_RAW_POINT *W, const EC_RAW_POINT *K00,
-                         const EC_RAW_POINT *K01, const EC_RAW_POINT *K10,
-                         const EC_RAW_POINT *K11) {
+                         const EC_AFFINE *X0, const EC_AFFINE *X1,
+                         const EC_AFFINE *T, const EC_AFFINE *S,
+                         const EC_AFFINE *W, const EC_AFFINE *K00,
+                         const EC_AFFINE *K01, const EC_AFFINE *K10,
+                         const EC_AFFINE *K11) {
   static const uint8_t kDLEQOR2Label[] = "DLEQOR2";
 
   int ok = 0;
@@ -396,16 +402,29 @@
   // We generate a DLEQ proof for the validity token and a DLEQOR2 proof for the
   // private metadata token. To allow amortizing Jacobian-to-affine conversions,
   // we compute Ki for both proofs first.
+  enum {
+    idx_T,
+    idx_S,
+    idx_W,
+    idx_Ws,
+    idx_Ks0,
+    idx_Ks1,
+    idx_Kb0,
+    idx_Kb1,
+    idx_Ko0,
+    idx_Ko1,
+    num_idx,
+  };
+  EC_RAW_POINT jacobians[num_idx];
 
   // Setup the DLEQ proof.
   EC_SCALAR ks0, ks1;
-  EC_RAW_POINT Ks0, Ks1;
   if (// ks0, ks1 <- Zp
       !ec_random_nonzero_scalar(group, &ks0, kDefaultAdditionalData) ||
       !ec_random_nonzero_scalar(group, &ks1, kDefaultAdditionalData) ||
       // Ks = ks0*(G;T) + ks1*(H;S)
-      !mul_twice_base(group, &Ks0, &ks0, &method->h, &ks1) ||
-      !mul_twice(group, &Ks1, T, &ks0, S, &ks1)) {
+      !mul_twice_base(group, &jacobians[idx_Ks0], &ks0, &method->h, &ks1) ||
+      !mul_twice(group, &jacobians[idx_Ks1], T, &ks0, S, &ks1)) {
     return 0;
   }
 
@@ -413,42 +432,54 @@
   // to the private metadata value) and pubo (public key corresponding to the
   // other value) in constant time.
   BN_ULONG mask = ((BN_ULONG)0) - (private_metadata & 1);
+  EC_AFFINE pubo_affine;
   EC_RAW_POINT pubo;
   EC_SCALAR xb, yb;
   ec_scalar_select(group, &xb, mask, &priv->x1, &priv->x0);
   ec_scalar_select(group, &yb, mask, &priv->y1, &priv->y0);
-  ec_point_select(group, &pubo, mask, &priv->pub0, &priv->pub1);
+  ec_affine_select(group, &pubo_affine, mask, &priv->pub0, &priv->pub1);
+  ec_affine_to_jacobian(group, &pubo, &pubo_affine);
 
   EC_SCALAR k0, k1, co, uo, vo;
-  EC_RAW_POINT Kb0, Kb1, Ko0, Ko1;
   if (// k0, k1 <- Zp
       !ec_random_nonzero_scalar(group, &k0, kDefaultAdditionalData) ||
       !ec_random_nonzero_scalar(group, &k1, kDefaultAdditionalData) ||
       // Kb = k0*(G;T) + k1*(H;S)
-      !mul_twice_base(group, &Kb0, &k0, &method->h, &k1) ||
-      !mul_twice(group, &Kb1, T, &k0, S, &k1) ||
+      !mul_twice_base(group, &jacobians[idx_Kb0], &k0, &method->h, &k1) ||
+      !mul_twice(group, &jacobians[idx_Kb1], T, &k0, S, &k1) ||
       // co, uo, vo <- Zp
       !ec_random_nonzero_scalar(group, &co, kDefaultAdditionalData) ||
       !ec_random_nonzero_scalar(group, &uo, kDefaultAdditionalData) ||
       !ec_random_nonzero_scalar(group, &vo, kDefaultAdditionalData) ||
       // Ko = uo*(G;T) + vo*(H;S) - co*(pubo;W)
-      !mul_add_and_sub(group, &Ko0, &Ko1, T, &uo, &method->h, S, &vo, &pubo, W,
-                       &co)) {
+      !mul_add_and_sub(group, &jacobians[idx_Ko0], &jacobians[idx_Ko1], T, &uo,
+                       &method->h, S, &vo, &pubo, W, &co)) {
+    return 0;
+  }
+
+  EC_AFFINE affines[num_idx];
+  jacobians[idx_T] = *T;
+  jacobians[idx_S] = *S;
+  jacobians[idx_W] = *W;
+  jacobians[idx_Ws] = *Ws;
+  if (!ec_jacobian_to_affine_batch(group, affines, jacobians, num_idx)) {
     return 0;
   }
 
   // Select the K corresponding to K0 and K1 in constant-time.
-  EC_RAW_POINT K00, K01, K10, K11;
-  ec_point_select(group, &K00, mask, &Ko0, &Kb0);
-  ec_point_select(group, &K01, mask, &Ko1, &Kb1);
-  ec_point_select(group, &K10, mask, &Kb0, &Ko0);
-  ec_point_select(group, &K11, mask, &Kb1, &Ko1);
+  EC_AFFINE K00, K01, K10, K11;
+  ec_affine_select(group, &K00, mask, &affines[idx_Ko0], &affines[idx_Kb0]);
+  ec_affine_select(group, &K01, mask, &affines[idx_Ko1], &affines[idx_Kb1]);
+  ec_affine_select(group, &K10, mask, &affines[idx_Kb0], &affines[idx_Ko0]);
+  ec_affine_select(group, &K11, mask, &affines[idx_Kb1], &affines[idx_Ko1]);
 
   // Compute c = Hc(...) for the two proofs.
   EC_SCALAR cs, c;
-  if (!hash_c_dleq(method, &cs, &priv->pubs, T, S, Ws, &Ks0, &Ks1) ||
-      !hash_c_dleqor(method, &c, &priv->pub0, &priv->pub1, T, S, W, &K00, &K01,
-                     &K10, &K11)) {
+  if (!hash_c_dleq(method, &cs, &priv->pubs, &affines[idx_T], &affines[idx_S],
+                   &affines[idx_Ws], &affines[idx_Ks0], &affines[idx_Ks1]) ||
+      !hash_c_dleqor(method, &c, &priv->pub0, &priv->pub1, &affines[idx_T],
+                     &affines[idx_S], &affines[idx_W], &K00, &K01, &K10,
+                     &K11)) {
     return 0;
   }
 
@@ -523,6 +554,20 @@
   // We verify a DLEQ proof for the validity token and a DLEQOR2 proof for the
   // private metadata token. To allow amortizing Jacobian-to-affine conversions,
   // we compute Ki for both proofs first.
+  enum {
+    idx_T,
+    idx_S,
+    idx_W,
+    idx_Ws,
+    idx_Ks0,
+    idx_Ks1,
+    idx_K00,
+    idx_K01,
+    idx_K10,
+    idx_K11,
+    num_idx,
+  };
+  EC_RAW_POINT jacobians[num_idx];
 
   // Decode the DLEQ proof.
   EC_SCALAR cs, us, vs;
@@ -534,9 +579,10 @@
   }
 
   // Ks = us*(G;T) + vs*(H;S) - cs*(pubs;Ws)
-  EC_RAW_POINT Ks0, Ks1;
-  if (!mul_add_and_sub(group, &Ks0, &Ks1, T, &us, &method->h, S, &vs,
-                       &pub->pubs, Ws, &cs)) {
+  EC_RAW_POINT pubs;
+  ec_affine_to_jacobian(group, &pubs, &pub->pubs);
+  if (!mul_add_and_sub(group, &jacobians[idx_Ks0], &jacobians[idx_Ks1], T, &us,
+                       &method->h, S, &vs, &pubs, Ws, &cs)) {
     return 0;
   }
 
@@ -552,19 +598,32 @@
     return 0;
   }
 
-  EC_RAW_POINT K00, K01, K10, K11;
+  EC_RAW_POINT pub0, pub1;
+  ec_affine_to_jacobian(group, &pub0, &pub->pub0);
+  ec_affine_to_jacobian(group, &pub1, &pub->pub1);
   if (// K0 = u0*(G;T) + v0*(H;S) - c0*(pub0;W)
-      !mul_add_and_sub(group, &K00, &K01, T, &u0, &method->h, S, &v0,
-                       &pub->pub0, W, &c0) ||
+      !mul_add_and_sub(group, &jacobians[idx_K00], &jacobians[idx_K01], T, &u0,
+                       &method->h, S, &v0, &pub0, W, &c0) ||
       // K1 = u1*(G;T) + v1*(H;S) - c1*(pub1;Ws)
-      !mul_add_and_sub(group, &K10, &K11, T, &u1, &method->h, S, &v1,
-                       &pub->pub1, W, &c1)) {
+      !mul_add_and_sub(group, &jacobians[idx_K10], &jacobians[idx_K11], T, &u1,
+                       &method->h, S, &v1, &pub1, W, &c1)) {
+    return 0;
+  }
+
+  EC_AFFINE affines[num_idx];
+  jacobians[idx_T] = *T;
+  jacobians[idx_S] = *S;
+  jacobians[idx_W] = *W;
+  jacobians[idx_Ws] = *Ws;
+  if (!ec_jacobian_to_affine_batch(group, affines, jacobians, num_idx)) {
     return 0;
   }
 
   // Check the DLEQ proof.
   EC_SCALAR calculated;
-  if (!hash_c_dleq(method, &calculated, &pub->pubs, T, S, Ws, &Ks0, &Ks1)) {
+  if (!hash_c_dleq(method, &calculated, &pub->pubs, &affines[idx_T],
+                   &affines[idx_S], &affines[idx_Ws], &affines[idx_Ks0],
+                   &affines[idx_Ks1])) {
     return 0;
   }
 
@@ -575,8 +634,10 @@
   }
 
   // Check the DLEQOR proof.
-  if (!hash_c_dleqor(method, &calculated, &pub->pub0, &pub->pub1, T, S, W, &K00,
-                     &K01, &K10, &K11)) {
+  if (!hash_c_dleqor(method, &calculated, &pub->pub0, &pub->pub1,
+                     &affines[idx_T], &affines[idx_S], &affines[idx_W],
+                     &affines[idx_K00], &affines[idx_K01], &affines[idx_K10],
+                     &affines[idx_K11])) {
     return 0;
   }
 
@@ -602,11 +663,13 @@
   }
 
   for (size_t i = 0; i < num_to_issue; i++) {
+    EC_AFFINE Tp_affine;
     EC_RAW_POINT Tp;
-    if (!cbs_get_prefixed_point(cbs, group, &Tp)) {
+    if (!cbs_get_prefixed_point(cbs, group, &Tp_affine)) {
       OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
       return 0;
     }
+    ec_affine_to_jacobian(group, &Tp, &Tp_affine);
 
     EC_SCALAR xb, yb;
     BN_ULONG mask = ((BN_ULONG)0) - (private_metadata & 1);
@@ -615,20 +678,30 @@
 
     uint8_t s[PMBTOKEN_NONCE_SIZE];
     RAND_bytes(s, PMBTOKEN_NONCE_SIZE);
-    EC_RAW_POINT Sp, Wp, Wsp;
+    EC_RAW_POINT Sp, W[2];
+    EC_AFFINE W_affine[2];
     CBB child;
-    if (!method->hash_s(group, &Sp, &Tp, s) ||
-        !mul_twice(group, &Wp, &Tp, &xb, &Sp, &yb) ||
-        !mul_twice(group, &Wsp, &Tp, &key->xs, &Sp, &key->ys) ||
+    if (!method->hash_s(group, &Sp, &Tp_affine, s) ||
+        !mul_twice(group, &W[0], &Tp, &xb, &Sp, &yb) ||
+        !mul_twice(group, &W[1], &Tp, &key->xs, &Sp, &key->ys) ||
+        // This call to |ec_jacobian_to_affine_batch| could be merged with the
+        // one in |dleq_generate|, but we expect to implement the batched DLEQOR
+        // proofs (see figure 15 of the PMBTokens paper), which would require a
+        // different interface.
+        //
+        // We similarly pass inputs to |dleq_generate| in Jacobian form, even
+        // though the affine values have already been computed. In the batched
+        // version, these inputs are the result of a multiplication.
+        !ec_jacobian_to_affine_batch(group, W_affine, W, 2) ||
         !CBB_add_bytes(cbb, s, PMBTOKEN_NONCE_SIZE) ||
         // TODO(https://crbug.com/boringssl/331): When updating the key format,
         // remove the redundant length prefixes.
         !CBB_add_u16_length_prefixed(cbb, &child) ||
-        !point_to_cbb(&child, group, &Wp) ||
+        !point_to_cbb(&child, group, &W_affine[0]) ||
         !CBB_add_u16_length_prefixed(cbb, &child) ||
-        !point_to_cbb(&child, group, &Wsp) ||
+        !point_to_cbb(&child, group, &W_affine[1]) ||
         !CBB_add_u16_length_prefixed(cbb, &child) ||
-        !dleq_generate(method, &child, key, &Tp, &Sp, &Wp, &Wsp,
+        !dleq_generate(method, &child, key, &Tp, &Sp, &W[0], &W[1],
                        private_metadata) ||
         !CBB_flush(cbb)) {
       return 0;
@@ -667,19 +740,25 @@
         sk_PMBTOKEN_PRETOKEN_value(pretokens, i);
 
     uint8_t s[PMBTOKEN_NONCE_SIZE];
-    EC_RAW_POINT Wp, Wsp;
+    EC_AFFINE Wp_affine, Wsp_affine;
     CBS proof;
     if (!CBS_copy_bytes(cbs, s, PMBTOKEN_NONCE_SIZE) ||
-        !cbs_get_prefixed_point(cbs, group, &Wp) ||
-        !cbs_get_prefixed_point(cbs, group, &Wsp) ||
+        !cbs_get_prefixed_point(cbs, group, &Wp_affine) ||
+        !cbs_get_prefixed_point(cbs, group, &Wsp_affine) ||
         !CBS_get_u16_length_prefixed(cbs, &proof)) {
       OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
       goto err;
     }
 
-    EC_RAW_POINT Sp;
+    // We pass |Tp| in Jacobian form to |dleq_verify| although the affine form
+    // is already available. This is in anticipation of supporting batched DLEQ
+    // proofs, where the input would be the result of a multiplication.
+    EC_RAW_POINT Tp, Wp, Wsp, Sp;
+    ec_affine_to_jacobian(group, &Tp, &pretoken->Tp);
+    ec_affine_to_jacobian(group, &Wp, &Wp_affine);
+    ec_affine_to_jacobian(group, &Wsp, &Wsp_affine);
     if (!method->hash_s(group, &Sp, &pretoken->Tp, s) ||
-        !dleq_verify(method, &proof, key, &pretoken->Tp, &Sp, &Wp, &Wsp)) {
+        !dleq_verify(method, &proof, key, &Tp, &Sp, &Wp, &Wsp)) {
       goto err;
     }
 
@@ -688,10 +767,13 @@
       goto err;
     }
 
-    EC_RAW_POINT S, W, Ws;
-    if (!ec_point_mul_scalar(group, &S, &Sp, &pretoken->r) ||
-        !ec_point_mul_scalar(group, &W, &Wp, &pretoken->r) ||
-        !ec_point_mul_scalar(group, &Ws, &Wsp, &pretoken->r)) {
+    // Unblind the token.
+    EC_RAW_POINT jacobians[3];
+    EC_AFFINE affines[3];
+    if (!ec_point_mul_scalar(group, &jacobians[0], &Sp, &pretoken->r) ||
+        !ec_point_mul_scalar(group, &jacobians[1], &Wp, &pretoken->r) ||
+        !ec_point_mul_scalar(group, &jacobians[2], &Wsp, &pretoken->r) ||
+        !ec_jacobian_to_affine_batch(group, affines, jacobians, 3)) {
       goto err;
     }
 
@@ -705,11 +787,11 @@
         // TODO(https://crbug.com/boringssl/331): When updating the key format,
         // remove the redundant length prefixes.
         !CBB_add_u16_length_prefixed(&token_cbb, &child) ||
-        !point_to_cbb(&child, group, &S) ||
+        !point_to_cbb(&child, group, &affines[0]) ||
         !CBB_add_u16_length_prefixed(&token_cbb, &child) ||
-        !point_to_cbb(&child, group, &W) ||
+        !point_to_cbb(&child, group, &affines[1]) ||
         !CBB_add_u16_length_prefixed(&token_cbb, &child) ||
-        !point_to_cbb(&child, group, &Ws) ||
+        !point_to_cbb(&child, group, &affines[2]) ||
         !CBB_flush(&token_cbb)) {
       CBB_cleanup(&token_cbb);
       goto err;
@@ -741,7 +823,7 @@
   const EC_GROUP *group = method->group;
   CBS cbs;
   CBS_init(&cbs, token, token_len);
-  EC_RAW_POINT S, W, Ws;
+  EC_AFFINE S, W, Ws;
   if (!CBS_copy_bytes(&cbs, out_nonce, PMBTOKEN_NONCE_SIZE) ||
       !cbs_get_prefixed_point(&cbs, group, &S) ||
       !cbs_get_prefixed_point(&cbs, group, &W) ||
@@ -757,22 +839,23 @@
     return 0;
   }
 
-  EC_RAW_POINT calculated;
+  EC_RAW_POINT S_jacobian, calculated;
   // Check the validity of the token.
-  if (!mul_twice(group, &calculated, &T, &key->xs, &S, &key->ys) ||
-      !ec_GFp_simple_points_equal(group, &calculated, &Ws)) {
+  ec_affine_to_jacobian(group, &S_jacobian, &S);
+  if (!mul_twice(group, &calculated, &T, &key->xs, &S_jacobian, &key->ys) ||
+      !ec_affine_jacobian_equal(group, &Ws, &calculated)) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_BAD_VALIDITY_CHECK);
     return 0;
   }
 
   EC_RAW_POINT W0, W1;
-  if (!mul_twice(group, &W0, &T, &key->x0, &S, &key->y0) ||
-      !mul_twice(group, &W1, &T, &key->x1, &S, &key->y1)) {
+  if (!mul_twice(group, &W0, &T, &key->x0, &S_jacobian, &key->y0) ||
+      !mul_twice(group, &W1, &T, &key->x1, &S_jacobian, &key->y1)) {
     return 0;
   }
 
-  const int is_W0 = ec_GFp_simple_points_equal(group, &W0, &W);
-  const int is_W1 = ec_GFp_simple_points_equal(group, &W1, &W);
+  const int is_W0 = ec_affine_jacobian_equal(group, &W, &W0);
+  const int is_W1 = ec_affine_jacobian_equal(group, &W, &W1);
   const int is_valid = is_W0 ^ is_W1;
   if (!is_valid) {
     // Invalid tokens will fail the validity check above.
@@ -795,14 +878,15 @@
 }
 
 static int pmbtoken_exp0_hash_s(const EC_GROUP *group, EC_RAW_POINT *out,
-                                const EC_RAW_POINT *t,
+                                const EC_AFFINE *t,
                                 const uint8_t s[PMBTOKEN_NONCE_SIZE]) {
   const uint8_t kHashSLabel[] = "PMBTokensV0 HashS";
   int ret = 0;
   CBB cbb;
   uint8_t *buf = NULL;
   size_t len;
-  if (!CBB_init(&cbb, 0) || !point_to_cbb(&cbb, group, t) ||
+  if (!CBB_init(&cbb, 0) ||
+      !point_to_cbb(&cbb, group, t) ||
       !CBB_add_bytes(&cbb, s, PMBTOKEN_NONCE_SIZE) ||
       !CBB_finish(&cbb, &buf, &len) ||
       !ec_hash_to_curve_p521_xmd_sha512_sswu_draft06(
@@ -994,7 +1078,7 @@
 }
 
 static int pmbtoken_exp1_hash_s(const EC_GROUP *group, EC_RAW_POINT *out,
-                                const EC_RAW_POINT *t,
+                                const EC_AFFINE *t,
                                 const uint8_t s[PMBTOKEN_NONCE_SIZE]) {
   const uint8_t kHashSLabel[] = "PMBTokens Experiment V1 HashS";
   int ret = 0;