Fix a parser bug where tokens are misidentified as commas. (#1502) * Fix a parser bug where tokens are misidentified as commas. In the old and new readers, when parsing an object, a comment followed by any non-`}` token is treated as a comma. The new unit test required changing the runjsontests.py flag regime so that failure tests could be run with default settings. * Honor allowComments==false mode. Much of the comment handling in the parsers is bespoke, and does not honor this flag. By unfiying it under a common API, the parser is simplified and strict mode is now more correctly strict. Note that allowComments mode does not allow for comments in arbitrary locations; they are allowed only in certain positions. Rectifying this is a bigger effort, since collectComments mode requires storing the comments somewhere, and it's not immediately clear where in the DOM all such comments should live. --------- Co-authored-by: Jordan Bayles <bayles.jordan@gmail.com>

commit: 0a9b9d9c6ea04847af83c32930176ec42ba6c842 [log] [tgz]
author: vslashg <veloso@verylowsodium.com> Mon Sep 09 20:30:16 2024 -0400
committer: GitHub <noreply@github.com> Mon Sep 09 17:30:16 2024 -0700
tree: 269d64ab6de008c6ad971563dacc94149a047333
parent: c3a986600f9975a33e3573d85b48db63011d3711 [diff]
diff --git a/include/json/reader.h b/include/json/reader.h
index 10c6a4f..85539d1 100644
--- a/include/json/reader.h
+++ b/include/json/reader.h

@@ -190,6 +190,7 @@
   using Errors = std::deque<ErrorInfo>;
 
   bool readToken(Token& token);
+  bool readTokenSkippingComments(Token& token);
   void skipSpaces();
   bool match(const Char* pattern, int patternLength);
   bool readComment();
@@ -221,7 +222,6 @@
                                 int& column) const;
   String getLocationLineAndColumn(Location location) const;
   void addComment(Location begin, Location end, CommentPlacement placement);
-  void skipCommentTokens(Token& token);
 
   static bool containsNewLine(Location begin, Location end);
   static String normalizeEOL(Location begin, Location end);

diff --git a/src/jsontestrunner/main.cpp b/src/jsontestrunner/main.cpp
index df717ff..ab6a800 100644
--- a/src/jsontestrunner/main.cpp
+++ b/src/jsontestrunner/main.cpp

@@ -240,11 +240,14 @@
     return printUsage(argv);
   }
   int index = 1;
-  if (Json::String(argv[index]) == "--json-checker") {
-    opts->features = Json::Features::strictMode();
+  if (Json::String(argv[index]) == "--parse-only") {
     opts->parseOnly = true;
     ++index;
   }
+  if (Json::String(argv[index]) == "--strict") {
+    opts->features = Json::Features::strictMode();
+    ++index;
+  }
   if (Json::String(argv[index]) == "--json-config") {
     printConfig();
     return 3;

diff --git a/src/lib_json/json_reader.cpp b/src/lib_json/json_reader.cpp
index 8dcd2b5..b12c6b8 100644
--- a/src/lib_json/json_reader.cpp
+++ b/src/lib_json/json_reader.cpp

@@ -129,7 +129,7 @@
 
   bool successful = readValue();
   Token token;
-  skipCommentTokens(token);
+  readTokenSkippingComments(token);
   if (collectComments_ && !commentsBefore_.empty())
     root.setComment(commentsBefore_, commentAfter);
   if (features_.strictRoot_) {
@@ -157,7 +157,7 @@
     throwRuntimeError("Exceeded stackLimit in readValue().");
 
   Token token;
-  skipCommentTokens(token);
+  readTokenSkippingComments(token);
   bool successful = true;
 
   if (collectComments_ && !commentsBefore_.empty()) {
@@ -225,14 +225,14 @@
   return successful;
 }
 
-void Reader::skipCommentTokens(Token& token) {
+bool Reader::readTokenSkippingComments(Token& token) {
+  bool success = readToken(token);
   if (features_.allowComments_) {
-    do {
-      readToken(token);
-    } while (token.type_ == tokenComment);
-  } else {
-    readToken(token);
+    while (success && token.type_ == tokenComment) {
+      success = readToken(token);
+    }
   }
+  return success;
 }
 
 bool Reader::readToken(Token& token) {
@@ -446,12 +446,7 @@
   Value init(objectValue);
   currentValue().swapPayload(init);
   currentValue().setOffsetStart(token.start_ - begin_);
-  while (readToken(tokenName)) {
-    bool initialTokenOk = true;
-    while (tokenName.type_ == tokenComment && initialTokenOk)
-      initialTokenOk = readToken(tokenName);
-    if (!initialTokenOk)
-      break;
+  while (readTokenSkippingComments(tokenName)) {
     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
       return true;
     name.clear();
@@ -480,15 +475,11 @@
       return recoverFromError(tokenObjectEnd);
 
     Token comma;
-    if (!readToken(comma) ||
-        (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
-         comma.type_ != tokenComment)) {
+    if (!readTokenSkippingComments(comma) ||
+        (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
       return addErrorAndRecover("Missing ',' or '}' in object declaration",
                                 comma, tokenObjectEnd);
     }
-    bool finalizeTokenOk = true;
-    while (comma.type_ == tokenComment && finalizeTokenOk)
-      finalizeTokenOk = readToken(comma);
     if (comma.type_ == tokenObjectEnd)
       return true;
   }
@@ -518,10 +509,7 @@
 
     Token currentToken;
     // Accept Comment after last item in the array.
-    ok = readToken(currentToken);
-    while (currentToken.type_ == tokenComment && ok) {
-      ok = readToken(currentToken);
-    }
+    ok = readTokenSkippingComments(currentToken);
     bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
                          currentToken.type_ != tokenArrayEnd);
     if (!ok || badTokenType) {
@@ -943,6 +931,7 @@
   using Errors = std::deque<ErrorInfo>;
 
   bool readToken(Token& token);
+  bool readTokenSkippingComments(Token& token);
   void skipSpaces();
   void skipBom(bool skipBom);
   bool match(const Char* pattern, int patternLength);
@@ -976,7 +965,6 @@
                                 int& column) const;
   String getLocationLineAndColumn(Location location) const;
   void addComment(Location begin, Location end, CommentPlacement placement);
-  void skipCommentTokens(Token& token);
 
   static String normalizeEOL(Location begin, Location end);
   static bool containsNewLine(Location begin, Location end);
@@ -1030,7 +1018,7 @@
   bool successful = readValue();
   nodes_.pop();
   Token token;
-  skipCommentTokens(token);
+  readTokenSkippingComments(token);
   if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
     addError("Extra non-whitespace after JSON value.", token);
     return false;
@@ -1058,7 +1046,7 @@
   if (nodes_.size() > features_.stackLimit_)
     throwRuntimeError("Exceeded stackLimit in readValue().");
   Token token;
-  skipCommentTokens(token);
+  readTokenSkippingComments(token);
   bool successful = true;
 
   if (collectComments_ && !commentsBefore_.empty()) {
@@ -1145,14 +1133,14 @@
   return successful;
 }
 
-void OurReader::skipCommentTokens(Token& token) {
+bool OurReader::readTokenSkippingComments(Token& token) {
+  bool success = readToken(token);
   if (features_.allowComments_) {
-    do {
-      readToken(token);
-    } while (token.type_ == tokenComment);
-  } else {
-    readToken(token);
+    while (success && token.type_ == tokenComment) {
+      success = readToken(token);
+    }
   }
+  return success;
 }
 
 bool OurReader::readToken(Token& token) {
@@ -1449,12 +1437,7 @@
   Value init(objectValue);
   currentValue().swapPayload(init);
   currentValue().setOffsetStart(token.start_ - begin_);
-  while (readToken(tokenName)) {
-    bool initialTokenOk = true;
-    while (tokenName.type_ == tokenComment && initialTokenOk)
-      initialTokenOk = readToken(tokenName);
-    if (!initialTokenOk)
-      break;
+  while (readTokenSkippingComments(tokenName)) {
     if (tokenName.type_ == tokenObjectEnd &&
         (name.empty() ||
          features_.allowTrailingCommas_)) // empty object or trailing comma
@@ -1491,15 +1474,11 @@
       return recoverFromError(tokenObjectEnd);
 
     Token comma;
-    if (!readToken(comma) ||
-        (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
-         comma.type_ != tokenComment)) {
+    if (!readTokenSkippingComments(comma) ||
+        (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
       return addErrorAndRecover("Missing ',' or '}' in object declaration",
                                 comma, tokenObjectEnd);
     }
-    bool finalizeTokenOk = true;
-    while (comma.type_ == tokenComment && finalizeTokenOk)
-      finalizeTokenOk = readToken(comma);
     if (comma.type_ == tokenObjectEnd)
       return true;
   }
@@ -1533,10 +1512,7 @@
 
     Token currentToken;
     // Accept Comment after last item in the array.
-    ok = readToken(currentToken);
-    while (currentToken.type_ == tokenComment && ok) {
-      ok = readToken(currentToken);
-    }
+    ok = readTokenSkippingComments(currentToken);
     bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
                          currentToken.type_ != tokenArrayEnd);
     if (!ok || badTokenType) {

diff --git a/test/data/fail_strict_comment_01.json b/test/data/fail_strict_comment_01.json
new file mode 100644
index 0000000..b7e0a5e
--- /dev/null
+++ b/test/data/fail_strict_comment_01.json

@@ -0,0 +1,4 @@
+{
+  "a": "aaa",
+  "b": "bbb" // comments not allowed in strict mode
+}

diff --git a/test/data/fail_strict_comment_02.json b/test/data/fail_strict_comment_02.json
new file mode 100644
index 0000000..699a7f7
--- /dev/null
+++ b/test/data/fail_strict_comment_02.json

@@ -0,0 +1,4 @@
+{
+  "a": "aaa", // comments not allowed in strict mode
+  "b": "bbb"
+}

diff --git a/test/data/fail_strict_comment_03.json b/test/data/fail_strict_comment_03.json
new file mode 100644
index 0000000..5f0fabf
--- /dev/null
+++ b/test/data/fail_strict_comment_03.json

@@ -0,0 +1,3 @@
+{
+  "array" : [1, 2, 3 /* comments not allowed in strict mode */]
+}

diff --git a/test/data/fail_test_object_02.json b/test/data/fail_test_object_02.json
new file mode 100644
index 0000000..afe62c5
--- /dev/null
+++ b/test/data/fail_test_object_02.json

@@ -0,0 +1 @@
+{"one": 1 /* } */ { "two" : 2 }

diff --git a/test/runjsontests.py b/test/runjsontests.py
index 5496e2c..49cc7a9 100644
--- a/test/runjsontests.py
+++ b/test/runjsontests.py

@@ -97,14 +97,17 @@
     valgrind_path = use_valgrind and VALGRIND_CMD or ''
     for input_path in tests + test_jsonchecker:
         expect_failure = os.path.basename(input_path).startswith('fail')
-        is_json_checker_test = (input_path in test_jsonchecker) or expect_failure
+        is_json_checker_test = input_path in test_jsonchecker
+        is_parse_only = is_json_checker_test or expect_failure
+        is_strict_test = ('_strict_' in os.path.basename(input_path)) or is_json_checker_test
         print('TESTING:', input_path, end=' ')
-        options = is_json_checker_test and '--json-checker' or ''
+        options = is_parse_only and '--parse-only' or ''
+        options += is_strict_test and ' --strict' or ''
         options += ' --json-writer %s'%writerClass
         cmd = '%s%s %s "%s"' % (            valgrind_path, jsontest_executable_path, options,
             input_path)
         status, process_output = getStatusOutput(cmd)
-        if is_json_checker_test:
+        if is_parse_only:
             if expect_failure:
                 if not status:
                     print('FAILED')
commit	0a9b9d9c6ea04847af83c32930176ec42ba6c842	[log] [tgz]
author	vslashg <veloso@verylowsodium.com>	Mon Sep 09 20:30:16 2024 -0400
committer	GitHub <noreply@github.com>	Mon Sep 09 17:30:16 2024 -0700
tree	269d64ab6de008c6ad971563dacc94149a047333
parent	c3a986600f9975a33e3573d85b48db63011d3711 [diff]