No public description

PiperOrigin-RevId: 555801756
diff --git a/build_defs/cc_fuzztest_grammar_library.bzl b/build_defs/cc_fuzztest_grammar_library.bzl
index a63a939..367a6a3 100644
--- a/build_defs/cc_fuzztest_grammar_library.bzl
+++ b/build_defs/cc_fuzztest_grammar_library.bzl
@@ -15,7 +15,7 @@
 """Build rules to create cc_library that implements the InGrammar domain for a
 given grammar from an ANTLRv4 grammar specification."""
 
-def cc_fuzztest_grammar_library(name, srcs):
+def cc_fuzztest_grammar_library(name, srcs, grammar_name = None):
     """Generates the C++ library corresponding to an antlr4 grammar specification.
 
     Args:
@@ -24,14 +24,15 @@
     """
 
     output_file_name = name + ".h"
+    cmd = "$(location //tools:grammar_domain_code_generator) " + "--output_header_file_path " + "$(@D)/" + output_file_name + " --input_grammar_files " + "`echo $(SRCS) | tr ' ' ','`"
+    if grammar_name:
+        cmd += " --grammar_name " + grammar_name
 
     native.genrule(
         name = name + "_source",
         srcs = srcs,
         outs = [output_file_name],
-        cmd = "$(location //tools:grammar_domain_code_generator) " +
-              "--output_header_file_path " +
-              "$(@D)/" + output_file_name + " --input_grammar_files " + "$(SRCS)",
+        cmd = cmd,
         heuristic_label_expansion = False,
         tools = ["//tools:grammar_domain_code_generator"],
     )
diff --git a/fuzztest/grammars/BUILD b/fuzztest/grammars/BUILD
index 96b2c30..f362fec 100644
--- a/fuzztest/grammars/BUILD
+++ b/fuzztest/grammars/BUILD
@@ -28,4 +28,5 @@
 cc_fuzztest_grammar_library(
     name = "json_grammar",
     srcs = ["JSON.g4"],
+    grammar_name = "json",
 )
diff --git a/fuzztest/grammars/CMakeLists.txt b/fuzztest/grammars/CMakeLists.txt
index 069a401..b225485 100644
--- a/fuzztest/grammars/CMakeLists.txt
+++ b/fuzztest/grammars/CMakeLists.txt
@@ -1,4 +1,4 @@
-function(fuzztest_grammar_library lib_name)
+function(fuzztest_grammar_library lib_name grammar_name)
   if (ARGN)
     set(${lib_name}_grammar_files "")
     set(${lib_name}_output_file ${lib_name}.h)
@@ -11,6 +11,7 @@
       OUTPUT ${${lib_name}_output_file}
       COMMAND
         $<TARGET_FILE:grammar_domain_code_generator>
+        --grammar_name ${grammar_name}
         --input_grammar_files ${all_grammar_files}
         --output_header_file_path
           ${CMAKE_CURRENT_BINARY_DIR}/${${lib_name}_output_file}
@@ -38,4 +39,4 @@
   endif()
 endfunction()
 
-fuzztest_grammar_library("json_grammar" ./JSON.g4)
+fuzztest_grammar_library("json_grammar" "json" ./JSON.g4)
diff --git a/fuzztest/grammars/JSON.g4 b/fuzztest/grammars/JSON.g4
index cacc2cf..818b1ca 100644
--- a/fuzztest/grammars/JSON.g4
+++ b/fuzztest/grammars/JSON.g4
@@ -18,7 +18,7 @@
 // - Restricted character set (e.g., no unicode chars).
 // - No escape sequences (e.g., \n, \t, \uff01, etc.)
 
-grammar JSON;
+grammar JSON_GRAMMAR;
 
 json : element ;
 
diff --git a/grammar_codegen/antlr_frontend.cc b/grammar_codegen/antlr_frontend.cc
index 322fac0..5679445 100644
--- a/grammar_codegen/antlr_frontend.cc
+++ b/grammar_codegen/antlr_frontend.cc
@@ -22,6 +22,7 @@
 #include <vector>
 
 #include "absl/strings/ascii.h"
+#include "absl/strings/str_cat.h"
 #include "absl/strings/str_format.h"
 #include "./grammar_codegen/generated_antlr_parser/ANTLRv4Lexer.h"
 #include "./grammar_codegen/grammar_info.h"
@@ -165,10 +166,11 @@
 Range GrammarInfoBuilder::ParseRange(std::string_view s) {
   return (s == "?")   ? Range::kOptional
          : (s == "+") ? Range::kNonEmpty
-         : (s == "*") ? Range::kUnlimited
-                      : (FUZZTEST_INTERNAL_CHECK(
-                             false, absl::StrCat("Unhandled case: ", s)),
-                         Range::kNoRange);
+         : (s == "*" || s == "+?" || s == "*?")
+             ? Range::kUnlimited
+             : (FUZZTEST_INTERNAL_CHECK(false,
+                                        absl::StrCat("Unhandled case: ", s)),
+                Range::kNoRange);
 }
 
 Block GrammarInfoBuilder::ConstructBlock(
@@ -258,7 +260,8 @@
 }
 
 Grammar GrammarInfoBuilder::BuildGrammarInfo(
-    const std::vector<std::string>& input_grammar_specs) {
+    const std::vector<std::string>& input_grammar_specs,
+    std::optional<std::string> grammar_name) {
   FUZZTEST_INTERNAL_CHECK_PRECONDITION(!input_grammar_specs.empty(),
                                        "No input files!");
   for (auto& input_grammar_spec : input_grammar_specs) {
@@ -278,6 +281,9 @@
       FUZZTEST_INTERNAL_CHECK(false, "Unknown errors!");
     }
   }
+  if (grammar_name.has_value()) {
+    grammar_name_ = *grammar_name;
+  }
   FUZZTEST_INTERNAL_CHECK(!grammar_name_.empty() && !rules_.empty(),
                           "Wrong grammar file!");
   return Grammar{std::move(grammar_name_), std::move(rules_)};
diff --git a/grammar_codegen/antlr_frontend.h b/grammar_codegen/antlr_frontend.h
index 17679b8..414e7f2 100644
--- a/grammar_codegen/antlr_frontend.h
+++ b/grammar_codegen/antlr_frontend.h
@@ -34,7 +34,9 @@
 // information for every symbol and constructs the IR.
 class GrammarInfoBuilder : public antlr4_grammar::ANTLRv4ParserBaseListener {
  public:
-  Grammar BuildGrammarInfo(const std::vector<std::string>& input_grammar_specs);
+  Grammar BuildGrammarInfo(
+      const std::vector<std::string>& input_grammar_specs,
+      std::optional<std::string> grammar_name = std::nullopt);
 
   // Every symbol in the grammar has a handler function in the listener, which
   // will be called when such a symbol is visited during tree traversal. The
diff --git a/grammar_codegen/backend.cc b/grammar_codegen/backend.cc
index 657b52e..6f80e45 100644
--- a/grammar_codegen/backend.cc
+++ b/grammar_codegen/backend.cc
@@ -149,6 +149,33 @@
       return absl::StrFormat("Optional<k%s, %s>", parent_type, child_type);
   }
 }
+
+bool HasEOF(const ProductionWithFallbackIndex& productions) {
+  for (const ProductionRule& production : productions.production_rules) {
+    for (const Block& block : production.blocks) {
+      const auto& element = block.element;
+      if (element.index() == BlockType::kNonTerminal) {
+        if (std::get<NonTerminal>(element).name == "EOF") {
+          return true;
+        }
+      } else if (element.index() == BlockType::kSubProductions) {
+        if (HasEOF(std::get<ProductionWithFallbackIndex>(element))) {
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
+bool HasEOF(const Grammar& grammar) {
+  for (const GrammarRule& rule : grammar.rules) {
+    if (HasEOF(rule.productions)) {
+      return true;
+    }
+  }
+  return false;
+}
 }  // namespace
 
 void CodeGenerator::Preprocess(Grammar& grammar) {
@@ -161,6 +188,13 @@
   }
   grammar.rules.insert(grammar.rules.end(), new_grammar_rules.begin(),
                        new_grammar_rules.end());
+  if (HasEOF(grammar)) {
+    ProductionRule prod_rule = {{Block{
+        Range::kNoRange, Terminal{TerminalType::kStringLiteral, "\"\""}}}};
+    GrammarRule eof_rule =
+        GrammarRule{"EOF", ProductionWithFallbackIndex{0, {prod_rule}}};
+    grammar.rules.push_back(eof_rule);
+  }
 }
 
 std::string CodeGenerator::Generate() {
@@ -388,7 +422,9 @@
   } while (has_change);
 
   for (size_t i = 0; i < safe_rule_indexes.size(); ++i) {
-    FUZZTEST_INTERNAL_CHECK(safe_rule_indexes[i], "Some node is not safe!");
+    FUZZTEST_INTERNAL_CHECK(
+        safe_rule_indexes[i],
+        absl::StrCat("Some node is not safe: ", rules[i].symbol_name));
   }
 
   // Ensure that every sub-block is marked safe. For example, a grammar rule
diff --git a/grammar_codegen/code_generation.cc b/grammar_codegen/code_generation.cc
index 35b2180..af0fe2e 100644
--- a/grammar_codegen/code_generation.cc
+++ b/grammar_codegen/code_generation.cc
@@ -14,12 +14,20 @@
 
 #include "./grammar_codegen/code_generation.h"
 
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "./grammar_codegen/backend.h"
+
 namespace fuzztest::internal::grammar {
 
 std::string GenerateGrammarHeader(
-    const std::vector<std::string>& input_grammar_specs) {
+    const std::vector<std::string>& input_grammar_specs,
+    std::optional<std::string> grammar_name) {
   GrammarInfoBuilder builder;
-  CodeGenerator backend(builder.BuildGrammarInfo(input_grammar_specs));
+  CodeGenerator backend(
+      builder.BuildGrammarInfo(input_grammar_specs, grammar_name));
   return backend.Generate();
 }
 }  // namespace fuzztest::internal::grammar
diff --git a/grammar_codegen/code_generation.h b/grammar_codegen/code_generation.h
index 95f5e05..30200f9 100644
--- a/grammar_codegen/code_generation.h
+++ b/grammar_codegen/code_generation.h
@@ -15,11 +15,12 @@
 #ifndef FUZZTEST_GRAMMAR_CODEGEN_CODE_GENERATION_H_
 #define FUZZTEST_GRAMMAR_CODEGEN_CODE_GENERATION_H_
 
+#include <optional>
 #include <string>
 #include <vector>
 
-#include "./grammar_codegen/backend.h"
 #include "./grammar_codegen/antlr_frontend.h"
+#include "./grammar_codegen/backend.h"
 
 namespace fuzztest::internal::grammar {
 
@@ -27,7 +28,8 @@
 // process of building grammar information from grammar files and generating
 // code from grammar information.
 std::string GenerateGrammarHeader(
-    const std::vector<std::string>& input_grammar_specs);
+    const std::vector<std::string>& input_grammar_specs,
+    std::optional<std::string> grammar_name = std::nullopt);
 
 }  // namespace fuzztest::internal::grammar
 
diff --git a/grammar_codegen/code_generation_test.cc b/grammar_codegen/code_generation_test.cc
index dad4d72..57431d5 100644
--- a/grammar_codegen/code_generation_test.cc
+++ b/grammar_codegen/code_generation_test.cc
@@ -67,7 +67,7 @@
   const std::vector<std::string> input_files{
       GetContents(absl::StrCat(src_dir, "fuzztest/grammars/JSON.g4"))};
   const std::string generated_header =
-      fuzztest::internal::grammar::GenerateGrammarHeader(input_files);
+      fuzztest::internal::grammar::GenerateGrammarHeader(input_files, "json");
   const std::string ground_true_header = GetContents(absl::StrCat(
       src_dir, "grammar_codegen/testdata/expected_json_grammar.h"));
 
diff --git a/tools/grammar_domain_code_generator.cc b/tools/grammar_domain_code_generator.cc
index d99bbf7..ad0c8a9 100644
--- a/tools/grammar_domain_code_generator.cc
+++ b/tools/grammar_domain_code_generator.cc
@@ -21,6 +21,7 @@
 //   --output_header_file=json_grammar.h
 
 #include <filesystem>
+#include <optional>
 #include <string>
 #include <vector>
 
@@ -36,6 +37,12 @@
     std::vector<std::string>, input_grammar_files, std::vector<std::string>(),
     "Required. The nonempty list of the input grammar specification files.");
 
+ABSL_FLAG(
+    std::string, grammar_name, "",
+    "Optional. The name of the top level grammar rule. The domain "
+    "generates strings of the grammar rule under this name. It is also in used"
+    "the domain name.");
+
 namespace {
 
 std::string GetContents(const std::string& path) {
@@ -63,6 +70,11 @@
       !input_files.empty(),
       "You must provide the list of input files, separated by ','");
 
+  std::optional<std::string> grammar_name = std::nullopt;
+  if (!absl::GetFlag(FLAGS_grammar_name).empty()) {
+    grammar_name = absl::GetFlag(FLAGS_grammar_name);
+  }
+
   std::vector<std::string> input_grammar_specs;
   for (const std::string& input_file : input_files) {
     FUZZTEST_INTERNAL_CHECK_PRECONDITION(std::filesystem::exists(input_file),
@@ -70,7 +82,7 @@
     input_grammar_specs.push_back(GetContents(input_file));
   }
   output_file << fuzztest::internal::grammar::GenerateGrammarHeader(
-      input_grammar_specs);
+      input_grammar_specs, grammar_name);
   output_file.close();
   return 0;
 }