Use new encoding functions in ASN1_mbstring_ncopy.
Update-Note: This changes causes BoringSSL to be stricter about handling
Unicode strings:
· Reject code points outside of Unicode
· Reject surrogate values
· Don't allow invalid UTF-8 to pass through when the source claims to
be UTF-8 already.
· Drop byte-order marks.
Previously, for example, a UniversalString could contain a large-valued
code point that would cause the UTF-8 encoder to emit invalid UTF-8.
Change-Id: I94d9db7796b70491b04494be84249907ff8fb46c
Reviewed-on: https://boringssl-review.googlesource.com/28325
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/asn1/a_mbstr.c b/crypto/asn1/a_mbstr.c
index a2789ed..1bbcd1b 100644
--- a/crypto/asn1/a_mbstr.c
+++ b/crypto/asn1/a_mbstr.c
@@ -56,23 +56,16 @@
#include <openssl/asn1.h>
+#include <limits.h>
#include <string.h>
+#include <openssl/bytestring.h>
#include <openssl/err.h>
#include <openssl/mem.h>
#include "asn1_locl.h"
+#include "../bytestring/internal.h"
-static int traverse_string(const unsigned char *p, int len, int inform,
- int (*rfunc) (uint32_t value, void *in),
- void *arg);
-static int in_utf8(uint32_t value, void *arg);
-static int out_utf8(uint32_t value, void *arg);
-static int type_str(uint32_t value, void *arg);
-static int cpy_asc(uint32_t value, void *arg);
-static int cpy_bmp(uint32_t value, void *arg);
-static int cpy_univ(uint32_t value, void *arg);
-static int cpy_utf8(uint32_t value, void *arg);
static int is_printable(uint32_t value);
/*
@@ -90,55 +83,45 @@
return ASN1_mbstring_ncopy(out, in, len, inform, mask, 0, 0);
}
+OPENSSL_DECLARE_ERROR_REASON(ASN1, INVALID_BMPSTRING)
+OPENSSL_DECLARE_ERROR_REASON(ASN1, INVALID_UNIVERSALSTRING)
+OPENSSL_DECLARE_ERROR_REASON(ASN1, INVALID_UTF8STRING)
+
int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len,
int inform, unsigned long mask,
long minsize, long maxsize)
{
int str_type;
- int ret;
char free_out;
- int outform, outlen = 0;
ASN1_STRING *dest;
- unsigned char *p;
- int nchar;
+ size_t nchar = 0;
char strbuf[32];
- int (*cpyfunc) (uint32_t, void *) = NULL;
if (len == -1)
len = strlen((const char *)in);
if (!mask)
mask = DIRSTRING_TYPE;
- /* First do a string check and work out the number of characters */
+ int (*decode_func)(CBS *, uint32_t*);
+ int error;
switch (inform) {
-
case MBSTRING_BMP:
- if (len & 1) {
- OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_BMPSTRING_LENGTH);
- return -1;
- }
- nchar = len >> 1;
+ decode_func = cbs_get_ucs2_be;
+ error = ASN1_R_INVALID_BMPSTRING;
break;
case MBSTRING_UNIV:
- if (len & 3) {
- OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UNIVERSALSTRING_LENGTH);
- return -1;
- }
- nchar = len >> 2;
+ decode_func = cbs_get_utf32_be;
+ error = ASN1_R_INVALID_UNIVERSALSTRING;
break;
case MBSTRING_UTF8:
- nchar = 0;
- /* This counts the characters and does utf8 syntax checking */
- ret = traverse_string(in, len, MBSTRING_UTF8, in_utf8, &nchar);
- if (ret < 0) {
- OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UTF8STRING);
- return -1;
- }
+ decode_func = cbs_get_utf8;
+ error = ASN1_R_INVALID_UTF8STRING;
break;
case MBSTRING_ASC:
- nchar = len;
+ decode_func = cbs_get_latin1;
+ error = ERR_R_INTERNAL_ERROR; // Latin-1 inputs are never invalid.
break;
default:
@@ -146,44 +129,92 @@
return -1;
}
- if ((minsize > 0) && (nchar < minsize)) {
+ /* Check |minsize| and |maxsize| and work out the minimal type, if any. */
+ CBS cbs;
+ CBS_init(&cbs, in, len);
+ size_t utf8_len = 0;
+ while (CBS_len(&cbs) != 0) {
+ uint32_t c;
+ if (!decode_func(&cbs, &c)) {
+ OPENSSL_PUT_ERROR(ASN1, error);
+ return -1;
+ }
+ if (nchar == 0 &&
+ (inform == MBSTRING_BMP || inform == MBSTRING_UNIV) &&
+ c == 0xfeff) {
+ /* Reject byte-order mark. We could drop it but that would mean
+ * adding ambiguity around whether a BOM was included or not when
+ * matching strings.
+ *
+ * For a little-endian UCS-2 string, the BOM will appear as 0xfffe
+ * and will be rejected as noncharacter, below. */
+ OPENSSL_PUT_ERROR(ASN1, ASN1_R_ILLEGAL_CHARACTERS);
+ return -1;
+ }
+
+ /* Update which output formats are still possible. */
+ if ((mask & B_ASN1_PRINTABLESTRING) && !is_printable(c)) {
+ mask &= ~B_ASN1_PRINTABLESTRING;
+ }
+ if ((mask & B_ASN1_IA5STRING) && (c > 127)) {
+ mask &= ~B_ASN1_IA5STRING;
+ }
+ if ((mask & B_ASN1_T61STRING) && (c > 0xff)) {
+ mask &= ~B_ASN1_T61STRING;
+ }
+ if ((mask & B_ASN1_BMPSTRING) && (c > 0xffff)) {
+ mask &= ~B_ASN1_BMPSTRING;
+ }
+ if (!mask) {
+ OPENSSL_PUT_ERROR(ASN1, ASN1_R_ILLEGAL_CHARACTERS);
+ return -1;
+ }
+
+ nchar++;
+ utf8_len += cbb_get_utf8_len(c);
+ }
+
+ if (minsize > 0 && nchar < (size_t)minsize) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_STRING_TOO_SHORT);
BIO_snprintf(strbuf, sizeof strbuf, "%ld", minsize);
ERR_add_error_data(2, "minsize=", strbuf);
return -1;
}
- if ((maxsize > 0) && (nchar > maxsize)) {
+ if (maxsize > 0 && nchar > (size_t)maxsize) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_STRING_TOO_LONG);
BIO_snprintf(strbuf, sizeof strbuf, "%ld", maxsize);
ERR_add_error_data(2, "maxsize=", strbuf);
return -1;
}
- /* Now work out minimal type (if any) */
- if (traverse_string(in, len, inform, type_str, &mask) < 0) {
- OPENSSL_PUT_ERROR(ASN1, ASN1_R_ILLEGAL_CHARACTERS);
- return -1;
- }
-
/* Now work out output format and string type */
- outform = MBSTRING_ASC;
- if (mask & B_ASN1_PRINTABLESTRING)
+ int (*encode_func)(CBB *, uint32_t) = cbb_add_latin1;
+ size_t size_estimate = nchar;
+ int outform = MBSTRING_ASC;
+ if (mask & B_ASN1_PRINTABLESTRING) {
str_type = V_ASN1_PRINTABLESTRING;
- else if (mask & B_ASN1_IA5STRING)
+ } else if (mask & B_ASN1_IA5STRING) {
str_type = V_ASN1_IA5STRING;
- else if (mask & B_ASN1_T61STRING)
+ } else if (mask & B_ASN1_T61STRING) {
str_type = V_ASN1_T61STRING;
- else if (mask & B_ASN1_BMPSTRING) {
+ } else if (mask & B_ASN1_BMPSTRING) {
str_type = V_ASN1_BMPSTRING;
outform = MBSTRING_BMP;
+ encode_func = cbb_add_ucs2_be;
+ size_estimate = 2 * nchar;
} else if (mask & B_ASN1_UNIVERSALSTRING) {
str_type = V_ASN1_UNIVERSALSTRING;
+ encode_func = cbb_add_utf32_be;
+ size_estimate = 4 * nchar;
outform = MBSTRING_UNIV;
} else {
str_type = V_ASN1_UTF8STRING;
outform = MBSTRING_UTF8;
+ encode_func = cbb_add_utf8;
+ size_estimate = utf8_len;
}
+
if (!out)
return str_type;
if (*out) {
@@ -204,6 +235,7 @@
}
*out = dest;
}
+
/* If both the same type just copy across */
if (inform == outform) {
if (!ASN1_STRING_set(dest, in, len)) {
@@ -213,179 +245,41 @@
return str_type;
}
- /* Work out how much space the destination will need */
- switch (outform) {
- case MBSTRING_ASC:
- outlen = nchar;
- cpyfunc = cpy_asc;
- break;
-
- case MBSTRING_BMP:
- outlen = nchar << 1;
- cpyfunc = cpy_bmp;
- break;
-
- case MBSTRING_UNIV:
- outlen = nchar << 2;
- cpyfunc = cpy_univ;
- break;
-
- case MBSTRING_UTF8:
- outlen = 0;
- traverse_string(in, len, inform, out_utf8, &outlen);
- cpyfunc = cpy_utf8;
- break;
- }
- if (!(p = OPENSSL_malloc(outlen + 1))) {
- if (free_out)
- ASN1_STRING_free(dest);
+ CBB cbb;
+ if (!CBB_init(&cbb, size_estimate + 1)) {
OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE);
- return -1;
+ goto err;
}
- dest->length = outlen;
- dest->data = p;
- p[outlen] = 0;
- traverse_string(in, len, inform, cpyfunc, &p);
+ CBS_init(&cbs, in, len);
+ while (CBS_len(&cbs) != 0) {
+ uint32_t c;
+ if (!decode_func(&cbs, &c) ||
+ !encode_func(&cbb, c)) {
+ OPENSSL_PUT_ERROR(ASN1, ERR_R_INTERNAL_ERROR);
+ goto err;
+ }
+ }
+ uint8_t *data = NULL;
+ size_t data_len;
+ if (/* OpenSSL historically NUL-terminated this value with a single byte,
+ * even for |MBSTRING_BMP| and |MBSTRING_UNIV|. */
+ !CBB_add_u8(&cbb, 0) ||
+ !CBB_finish(&cbb, &data, &data_len) ||
+ data_len < 1 ||
+ data_len > INT_MAX) {
+ OPENSSL_PUT_ERROR(ASN1, ERR_R_INTERNAL_ERROR);
+ OPENSSL_free(data);
+ goto err;
+ }
+ dest->length = (int)(data_len - 1);
+ dest->data = data;
return str_type;
-}
-/*
- * This function traverses a string and passes the value of each character to
- * an optional function along with a void * argument.
- */
-
-static int traverse_string(const unsigned char *p, int len, int inform,
- int (*rfunc) (uint32_t value, void *in),
- void *arg)
-{
- uint32_t value;
- int ret;
- while (len) {
- if (inform == MBSTRING_ASC) {
- value = *p++;
- len--;
- } else if (inform == MBSTRING_BMP) {
- value = *p++ << 8;
- value |= *p++;
- len -= 2;
- } else if (inform == MBSTRING_UNIV) {
- value = ((uint32_t)*p++) << 24;
- value |= ((uint32_t)*p++) << 16;
- value |= *p++ << 8;
- value |= *p++;
- len -= 4;
- } else {
- ret = UTF8_getc(p, len, &value);
- if (ret < 0)
- return -1;
- len -= ret;
- p += ret;
- }
- if (rfunc) {
- ret = rfunc(value, arg);
- if (ret <= 0)
- return ret;
- }
- }
- return 1;
-}
-
-/* Various utility functions for traverse_string */
-
-/* Just count number of characters */
-
-static int in_utf8(uint32_t value, void *arg)
-{
- int *nchar;
- nchar = arg;
- (*nchar)++;
- return 1;
-}
-
-/* Determine size of output as a UTF8 String */
-
-static int out_utf8(uint32_t value, void *arg)
-{
- int *outlen;
- outlen = arg;
- *outlen += UTF8_putc(NULL, -1, value);
- return 1;
-}
-
-/*
- * Determine the "type" of a string: check each character against a supplied
- * "mask".
- */
-
-static int type_str(uint32_t value, void *arg)
-{
- unsigned long types;
- types = *((unsigned long *)arg);
- if ((types & B_ASN1_PRINTABLESTRING) && !is_printable(value))
- types &= ~B_ASN1_PRINTABLESTRING;
- if ((types & B_ASN1_IA5STRING) && (value > 127))
- types &= ~B_ASN1_IA5STRING;
- if ((types & B_ASN1_T61STRING) && (value > 0xff))
- types &= ~B_ASN1_T61STRING;
- if ((types & B_ASN1_BMPSTRING) && (value > 0xffff))
- types &= ~B_ASN1_BMPSTRING;
- if (!types)
- return -1;
- *((unsigned long *)arg) = types;
- return 1;
-}
-
-/* Copy one byte per character ASCII like strings */
-
-static int cpy_asc(uint32_t value, void *arg)
-{
- unsigned char **p, *q;
- p = arg;
- q = *p;
- *q = (unsigned char)value;
- (*p)++;
- return 1;
-}
-
-/* Copy two byte per character BMPStrings */
-
-static int cpy_bmp(uint32_t value, void *arg)
-{
- unsigned char **p, *q;
- p = arg;
- q = *p;
- *q++ = (unsigned char)((value >> 8) & 0xff);
- *q = (unsigned char)(value & 0xff);
- *p += 2;
- return 1;
-}
-
-/* Copy four byte per character UniversalStrings */
-
-static int cpy_univ(uint32_t value, void *arg)
-{
- unsigned char **p, *q;
- p = arg;
- q = *p;
- *q++ = (unsigned char)((value >> 24) & 0xff);
- *q++ = (unsigned char)((value >> 16) & 0xff);
- *q++ = (unsigned char)((value >> 8) & 0xff);
- *q = (unsigned char)(value & 0xff);
- *p += 4;
- return 1;
-}
-
-/* Copy to a UTF8String */
-
-static int cpy_utf8(uint32_t value, void *arg)
-{
- unsigned char **p;
- int ret;
- p = arg;
- /* We already know there is enough room so pass 0xff as the length */
- ret = UTF8_putc(*p, 0xff, value);
- *p += ret;
- return 1;
+ err:
+ if (free_out)
+ ASN1_STRING_free(dest);
+ CBB_cleanup(&cbb);
+ return -1;
}
/* Return 1 if the character is permitted in a PrintableString */
diff --git a/crypto/err/asn1.errordata b/crypto/err/asn1.errordata
index 56cbbe52..271561b 100644
--- a/crypto/err/asn1.errordata
+++ b/crypto/err/asn1.errordata
@@ -40,14 +40,14 @@
ASN1,139,INTEGER_NOT_ASCII_FORMAT
ASN1,140,INTEGER_TOO_LARGE_FOR_LONG
ASN1,141,INVALID_BIT_STRING_BITS_LEFT
-ASN1,142,INVALID_BMPSTRING_LENGTH
+ASN1,142,INVALID_BMPSTRING
ASN1,143,INVALID_DIGIT
ASN1,144,INVALID_MODIFIER
ASN1,145,INVALID_NUMBER
ASN1,146,INVALID_OBJECT_ENCODING
ASN1,147,INVALID_SEPARATOR
ASN1,148,INVALID_TIME_FORMAT
-ASN1,149,INVALID_UNIVERSALSTRING_LENGTH
+ASN1,149,INVALID_UNIVERSALSTRING
ASN1,150,INVALID_UTF8STRING
ASN1,151,LIST_ERROR
ASN1,152,MISSING_ASN1_EOS
diff --git a/crypto/test/test_util.cc b/crypto/test/test_util.cc
index 493b124..4ad777f 100644
--- a/crypto/test/test_util.cc
+++ b/crypto/test/test_util.cc
@@ -30,6 +30,10 @@
}
std::ostream &operator<<(std::ostream &os, const Bytes &in) {
+ if (in.len == 0) {
+ return os << "<empty Bytes>";
+ }
+
// Print a byte slice as hex.
static const char hex[] = "0123456789abcdef";
for (size_t i = 0; i < in.len; i++) {
diff --git a/crypto/x509/a_strex.c b/crypto/x509/a_strex.c
index c0c346d..6dc183a 100644
--- a/crypto/x509/a_strex.c
+++ b/crypto/x509/a_strex.c
@@ -189,13 +189,13 @@
switch (charwidth) {
case 4:
if (buflen & 3) {
- OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UNIVERSALSTRING_LENGTH);
+ OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UNIVERSALSTRING);
return -1;
}
break;
case 2:
if (buflen & 1) {
- OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_BMPSTRING_LENGTH);
+ OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_BMPSTRING);
return -1;
}
break;
diff --git a/crypto/x509/x509_test.cc b/crypto/x509/x509_test.cc
index 7615324..9c99aee 100644
--- a/crypto/x509/x509_test.cc
+++ b/crypto/x509/x509_test.cc
@@ -20,8 +20,8 @@
#include <openssl/bio.h>
#include <openssl/bytestring.h>
-#include <openssl/curve25519.h>
#include <openssl/crypto.h>
+#include <openssl/curve25519.h>
#include <openssl/digest.h>
#include <openssl/err.h>
#include <openssl/pem.h>
@@ -30,6 +30,7 @@
#include <openssl/x509v3.h>
#include "../internal.h"
+#include "../test/test_util.h"
std::string GetTestData(const char *path);
@@ -1283,3 +1284,62 @@
EXPECT_EQ(X509_NAME_ENTRY_set(X509_NAME_get_entry(name.get(), 1)), 1);
EXPECT_EQ(X509_NAME_ENTRY_set(X509_NAME_get_entry(name.get(), 2)), 2);
}
+
+TEST(X509Test, StringDecoding) {
+ static const struct {
+ std::vector<uint8_t> in;
+ int type;
+ const char *expected;
+ } kTests[] = {
+ // Non-minimal, two-byte UTF-8.
+ {{0xc0, 0x81}, V_ASN1_UTF8STRING, nullptr},
+ // Non-minimal, three-byte UTF-8.
+ {{0xe0, 0x80, 0x81}, V_ASN1_UTF8STRING, nullptr},
+ // Non-minimal, four-byte UTF-8.
+ {{0xf0, 0x80, 0x80, 0x81}, V_ASN1_UTF8STRING, nullptr},
+ // Truncated, four-byte UTF-8.
+ {{0xf0, 0x80, 0x80}, V_ASN1_UTF8STRING, nullptr},
+ // Low-surrogate value.
+ {{0xed, 0xa0, 0x80}, V_ASN1_UTF8STRING, nullptr},
+ // High-surrogate value.
+ {{0xed, 0xb0, 0x81}, V_ASN1_UTF8STRING, nullptr},
+ // Initial BOMs should be rejected from UCS-2 and UCS-4.
+ {{0xfe, 0xff, 0, 88}, V_ASN1_BMPSTRING, nullptr},
+ {{0, 0, 0xfe, 0xff, 0, 0, 0, 88}, V_ASN1_UNIVERSALSTRING, nullptr},
+ // Otherwise, BOMs should pass through.
+ {{0, 88, 0xfe, 0xff}, V_ASN1_BMPSTRING, "X\xef\xbb\xbf"},
+ {{0, 0, 0, 88, 0, 0, 0xfe, 0xff}, V_ASN1_UNIVERSALSTRING,
+ "X\xef\xbb\xbf"},
+ // The maximum code-point should pass though.
+ {{0, 16, 0xff, 0xfd}, V_ASN1_UNIVERSALSTRING, "\xf4\x8f\xbf\xbd"},
+ // Values outside the Unicode space should not.
+ {{0, 17, 0, 0}, V_ASN1_UNIVERSALSTRING, nullptr},
+ // Non-characters should be rejected.
+ {{0, 1, 0xff, 0xff}, V_ASN1_UNIVERSALSTRING, nullptr},
+ {{0, 1, 0xff, 0xfe}, V_ASN1_UNIVERSALSTRING, nullptr},
+ {{0, 0, 0xfd, 0xd5}, V_ASN1_UNIVERSALSTRING, nullptr},
+ // BMPString is UCS-2, not UTF-16, so surrogate pairs are invalid.
+ {{0xd8, 0, 0xdc, 1}, V_ASN1_BMPSTRING, nullptr},
+ };
+
+ for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kTests); i++) {
+ SCOPED_TRACE(i);
+ const auto& test = kTests[i];
+ ASN1_STRING s;
+ s.type = test.type;
+ s.data = const_cast<uint8_t*>(test.in.data());
+ s.length = test.in.size();
+
+ uint8_t *utf8;
+ const int utf8_len = ASN1_STRING_to_UTF8(&utf8, &s);
+ EXPECT_EQ(utf8_len < 0, test.expected == nullptr);
+ if (utf8_len >= 0) {
+ if (test.expected != nullptr) {
+ EXPECT_EQ(Bytes(test.expected), Bytes(utf8, utf8_len));
+ }
+ OPENSSL_free(utf8);
+ } else {
+ ERR_clear_error();
+ }
+ }
+}
diff --git a/include/openssl/asn1.h b/include/openssl/asn1.h
index f2e92a7..f7b6b86 100644
--- a/include/openssl/asn1.h
+++ b/include/openssl/asn1.h
@@ -152,6 +152,9 @@
/* For use with ASN1_mbstring_copy() */
#define MBSTRING_FLAG 0x1000
#define MBSTRING_UTF8 (MBSTRING_FLAG)
+/* |MBSTRING_ASC| refers to Latin-1, not ASCII. It is used with TeletexString
+ * which, in turn, is treated as Latin-1 rather than T.61 by OpenSSL and most
+ * other software. */
#define MBSTRING_ASC (MBSTRING_FLAG|1)
#define MBSTRING_BMP (MBSTRING_FLAG|2)
#define MBSTRING_UNIV (MBSTRING_FLAG|4)
@@ -926,14 +929,14 @@
#define ASN1_R_INTEGER_NOT_ASCII_FORMAT 139
#define ASN1_R_INTEGER_TOO_LARGE_FOR_LONG 140
#define ASN1_R_INVALID_BIT_STRING_BITS_LEFT 141
-#define ASN1_R_INVALID_BMPSTRING_LENGTH 142
+#define ASN1_R_INVALID_BMPSTRING 142
#define ASN1_R_INVALID_DIGIT 143
#define ASN1_R_INVALID_MODIFIER 144
#define ASN1_R_INVALID_NUMBER 145
#define ASN1_R_INVALID_OBJECT_ENCODING 146
#define ASN1_R_INVALID_SEPARATOR 147
#define ASN1_R_INVALID_TIME_FORMAT 148
-#define ASN1_R_INVALID_UNIVERSALSTRING_LENGTH 149
+#define ASN1_R_INVALID_UNIVERSALSTRING 149
#define ASN1_R_INVALID_UTF8STRING 150
#define ASN1_R_LIST_ERROR 151
#define ASN1_R_MISSING_ASN1_EOS 152