Enable Text format parser to skip unknown short-formed repeated fields.
The text format specs state that short formed repeated field should be valid. For example:
```
repeated_field: [{}, {}]
```
However, this was not supported when text format parser tries to skip **unknown fields**.
PiperOrigin-RevId: 493085702
diff --git a/java/core/src/main/java/com/google/protobuf/TextFormat.java b/java/core/src/main/java/com/google/protobuf/TextFormat.java
index 65e403e..8c3ac5c 100644
--- a/java/core/src/main/java/com/google/protobuf/TextFormat.java
+++ b/java/core/src/main/java/com/google/protobuf/TextFormat.java
@@ -1893,18 +1893,8 @@
// Skips unknown fields.
if (field == null) {
- // Try to guess the type of this field.
- // If this field is not a message, there should be a ":" between the
- // field name and the field value and also the field value should not
- // start with "{" or "<" which indicates the beginning of a message body.
- // If there is no ":" or there is a "{" or "<" after ":", this field has
- // to be a message or the input is ill-formed.
detectSilentMarker(tokenizer, type, name);
- if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) {
- skipFieldValue(tokenizer);
- } else {
- skipFieldMessage(tokenizer, type);
- }
+ guessFieldTypeAndSkip(tokenizer, type);
return;
}
@@ -2263,19 +2253,9 @@
/** Skips the next field including the field's name and value. */
private void skipField(Tokenizer tokenizer, Descriptor type) throws ParseException {
String name = consumeFullTypeName(tokenizer);
-
- // Try to guess the type of this field.
- // If this field is not a message, there should be a ":" between the
- // field name and the field value and also the field value should not
- // start with "{" or "<" which indicates the beginning of a message body.
- // If there is no ":" or there is a "{" or "<" after ":", this field has
- // to be a message or the input is ill-formed.
detectSilentMarker(tokenizer, type, name);
- if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("<") && !tokenizer.lookingAt("{")) {
- skipFieldValue(tokenizer);
- } else {
- skipFieldMessage(tokenizer, type);
- }
+ guessFieldTypeAndSkip(tokenizer, type);
+
// For historical reasons, fields may optionally be separated by commas or
// semicolons.
if (!tokenizer.tryConsume(";")) {
@@ -2314,6 +2294,58 @@
throw tokenizer.parseException("Invalid field value: " + tokenizer.currentToken);
}
}
+
+ /**
+ * Tries to guess the type of this field and skip it.
+ *
+ * <p>If this field is not a message, there should be a ":" between the field name and the field
+ * value and also the field value should not start with "{" or "<" which indicates the beginning
+ * of a message body. If there is no ":" or there is a "{" or "<" after ":", this field has to
+ * be a message or the input is ill-formed. For short-formed repeated fields (i.e. with "[]"),
+ * if it is repeated scalar, there must be a ":" between the field name and the starting "[" .
+ */
+ private void guessFieldTypeAndSkip(Tokenizer tokenizer, Descriptor type) throws ParseException {
+ boolean semicolonConsumed = tokenizer.tryConsume(":");
+ if (tokenizer.lookingAt("[")) {
+ // Short repeated field form. If a semicolon was consumed, it could be repeated scalar or
+ // repeated message. If not, it must be repeated message.
+ skipFieldShortFormedRepeated(tokenizer, semicolonConsumed, type);
+ } else if (semicolonConsumed && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) {
+ skipFieldValue(tokenizer);
+ } else {
+ skipFieldMessage(tokenizer, type);
+ }
+ }
+
+ /**
+ * Skips a short-formed repeated field value.
+ *
+ * <p>Reports an error if scalar type is not allowed but showing up inside "[]".
+ */
+ private void skipFieldShortFormedRepeated(
+ Tokenizer tokenizer, boolean scalarAllowed, Descriptor type) throws ParseException {
+ if (!tokenizer.tryConsume("[") || tokenizer.tryConsume("]")) {
+ // Try skipping "[]".
+ return;
+ }
+
+ while (true) {
+ if (tokenizer.lookingAt("{") || tokenizer.lookingAt("<")) {
+ // Try skipping message field inside "[]"
+ skipFieldMessage(tokenizer, type);
+ } else if (scalarAllowed) {
+ // Try skipping scalar field inside "[]".
+ skipFieldValue(tokenizer);
+ } else {
+ throw tokenizer.parseException(
+ "Invalid repeated scalar field: missing \":\" before \"[\".");
+ }
+ if (tokenizer.tryConsume("]")) {
+ break;
+ }
+ tokenizer.consume(",");
+ }
+ }
}
// =================================================================