No public description
PiperOrigin-RevId: 555801756
diff --git a/build_defs/cc_fuzztest_grammar_library.bzl b/build_defs/cc_fuzztest_grammar_library.bzl
index a63a939..367a6a3 100644
--- a/build_defs/cc_fuzztest_grammar_library.bzl
+++ b/build_defs/cc_fuzztest_grammar_library.bzl
@@ -15,7 +15,7 @@
"""Build rules to create cc_library that implements the InGrammar domain for a
given grammar from an ANTLRv4 grammar specification."""
-def cc_fuzztest_grammar_library(name, srcs):
+def cc_fuzztest_grammar_library(name, srcs, grammar_name = None):
"""Generates the C++ library corresponding to an antlr4 grammar specification.
Args:
@@ -24,14 +24,15 @@
"""
output_file_name = name + ".h"
+ cmd = "$(location //tools:grammar_domain_code_generator) " + "--output_header_file_path " + "$(@D)/" + output_file_name + " --input_grammar_files " + "`echo $(SRCS) | tr ' ' ','`"
+ if grammar_name:
+ cmd += " --grammar_name " + grammar_name
native.genrule(
name = name + "_source",
srcs = srcs,
outs = [output_file_name],
- cmd = "$(location //tools:grammar_domain_code_generator) " +
- "--output_header_file_path " +
- "$(@D)/" + output_file_name + " --input_grammar_files " + "$(SRCS)",
+ cmd = cmd,
heuristic_label_expansion = False,
tools = ["//tools:grammar_domain_code_generator"],
)
diff --git a/fuzztest/grammars/BUILD b/fuzztest/grammars/BUILD
index 96b2c30..f362fec 100644
--- a/fuzztest/grammars/BUILD
+++ b/fuzztest/grammars/BUILD
@@ -28,4 +28,5 @@
cc_fuzztest_grammar_library(
name = "json_grammar",
srcs = ["JSON.g4"],
+ grammar_name = "json",
)
diff --git a/fuzztest/grammars/CMakeLists.txt b/fuzztest/grammars/CMakeLists.txt
index 069a401..b225485 100644
--- a/fuzztest/grammars/CMakeLists.txt
+++ b/fuzztest/grammars/CMakeLists.txt
@@ -1,4 +1,4 @@
-function(fuzztest_grammar_library lib_name)
+function(fuzztest_grammar_library lib_name grammar_name)
if (ARGN)
set(${lib_name}_grammar_files "")
set(${lib_name}_output_file ${lib_name}.h)
@@ -11,6 +11,7 @@
OUTPUT ${${lib_name}_output_file}
COMMAND
$<TARGET_FILE:grammar_domain_code_generator>
+ --grammar_name ${grammar_name}
--input_grammar_files ${all_grammar_files}
--output_header_file_path
${CMAKE_CURRENT_BINARY_DIR}/${${lib_name}_output_file}
@@ -38,4 +39,4 @@
endif()
endfunction()
-fuzztest_grammar_library("json_grammar" ./JSON.g4)
+fuzztest_grammar_library("json_grammar" "json" ./JSON.g4)
diff --git a/fuzztest/grammars/JSON.g4 b/fuzztest/grammars/JSON.g4
index cacc2cf..818b1ca 100644
--- a/fuzztest/grammars/JSON.g4
+++ b/fuzztest/grammars/JSON.g4
@@ -18,7 +18,7 @@
// - Restricted character set (e.g., no unicode chars).
// - No escape sequences (e.g., \n, \t, \uff01, etc.)
-grammar JSON;
+grammar JSON_GRAMMAR;
json : element ;
diff --git a/grammar_codegen/antlr_frontend.cc b/grammar_codegen/antlr_frontend.cc
index 322fac0..5679445 100644
--- a/grammar_codegen/antlr_frontend.cc
+++ b/grammar_codegen/antlr_frontend.cc
@@ -22,6 +22,7 @@
#include <vector>
#include "absl/strings/ascii.h"
+#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "./grammar_codegen/generated_antlr_parser/ANTLRv4Lexer.h"
#include "./grammar_codegen/grammar_info.h"
@@ -165,10 +166,11 @@
Range GrammarInfoBuilder::ParseRange(std::string_view s) {
return (s == "?") ? Range::kOptional
: (s == "+") ? Range::kNonEmpty
- : (s == "*") ? Range::kUnlimited
- : (FUZZTEST_INTERNAL_CHECK(
- false, absl::StrCat("Unhandled case: ", s)),
- Range::kNoRange);
+ : (s == "*" || s == "+?" || s == "*?")
+ ? Range::kUnlimited
+ : (FUZZTEST_INTERNAL_CHECK(false,
+ absl::StrCat("Unhandled case: ", s)),
+ Range::kNoRange);
}
Block GrammarInfoBuilder::ConstructBlock(
@@ -258,7 +260,8 @@
}
Grammar GrammarInfoBuilder::BuildGrammarInfo(
- const std::vector<std::string>& input_grammar_specs) {
+ const std::vector<std::string>& input_grammar_specs,
+ std::optional<std::string> grammar_name) {
FUZZTEST_INTERNAL_CHECK_PRECONDITION(!input_grammar_specs.empty(),
"No input files!");
for (auto& input_grammar_spec : input_grammar_specs) {
@@ -278,6 +281,9 @@
FUZZTEST_INTERNAL_CHECK(false, "Unknown errors!");
}
}
+ if (grammar_name.has_value()) {
+ grammar_name_ = *grammar_name;
+ }
FUZZTEST_INTERNAL_CHECK(!grammar_name_.empty() && !rules_.empty(),
"Wrong grammar file!");
return Grammar{std::move(grammar_name_), std::move(rules_)};
diff --git a/grammar_codegen/antlr_frontend.h b/grammar_codegen/antlr_frontend.h
index 17679b8..414e7f2 100644
--- a/grammar_codegen/antlr_frontend.h
+++ b/grammar_codegen/antlr_frontend.h
@@ -34,7 +34,9 @@
// information for every symbol and constructs the IR.
class GrammarInfoBuilder : public antlr4_grammar::ANTLRv4ParserBaseListener {
public:
- Grammar BuildGrammarInfo(const std::vector<std::string>& input_grammar_specs);
+ Grammar BuildGrammarInfo(
+ const std::vector<std::string>& input_grammar_specs,
+ std::optional<std::string> grammar_name = std::nullopt);
// Every symbol in the grammar has a handler function in the listener, which
// will be called when such a symbol is visited during tree traversal. The
diff --git a/grammar_codegen/backend.cc b/grammar_codegen/backend.cc
index 657b52e..6f80e45 100644
--- a/grammar_codegen/backend.cc
+++ b/grammar_codegen/backend.cc
@@ -149,6 +149,33 @@
return absl::StrFormat("Optional<k%s, %s>", parent_type, child_type);
}
}
+
+bool HasEOF(const ProductionWithFallbackIndex& productions) {
+ for (const ProductionRule& production : productions.production_rules) {
+ for (const Block& block : production.blocks) {
+ const auto& element = block.element;
+ if (element.index() == BlockType::kNonTerminal) {
+ if (std::get<NonTerminal>(element).name == "EOF") {
+ return true;
+ }
+ } else if (element.index() == BlockType::kSubProductions) {
+ if (HasEOF(std::get<ProductionWithFallbackIndex>(element))) {
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+}
+
+bool HasEOF(const Grammar& grammar) {
+ for (const GrammarRule& rule : grammar.rules) {
+ if (HasEOF(rule.productions)) {
+ return true;
+ }
+ }
+ return false;
+}
} // namespace
void CodeGenerator::Preprocess(Grammar& grammar) {
@@ -161,6 +188,13 @@
}
grammar.rules.insert(grammar.rules.end(), new_grammar_rules.begin(),
new_grammar_rules.end());
+ if (HasEOF(grammar)) {
+ ProductionRule prod_rule = {{Block{
+ Range::kNoRange, Terminal{TerminalType::kStringLiteral, "\"\""}}}};
+ GrammarRule eof_rule =
+ GrammarRule{"EOF", ProductionWithFallbackIndex{0, {prod_rule}}};
+ grammar.rules.push_back(eof_rule);
+ }
}
std::string CodeGenerator::Generate() {
@@ -388,7 +422,9 @@
} while (has_change);
for (size_t i = 0; i < safe_rule_indexes.size(); ++i) {
- FUZZTEST_INTERNAL_CHECK(safe_rule_indexes[i], "Some node is not safe!");
+ FUZZTEST_INTERNAL_CHECK(
+ safe_rule_indexes[i],
+ absl::StrCat("Some node is not safe: ", rules[i].symbol_name));
}
// Ensure that every sub-block is marked safe. For example, a grammar rule
diff --git a/grammar_codegen/code_generation.cc b/grammar_codegen/code_generation.cc
index 35b2180..af0fe2e 100644
--- a/grammar_codegen/code_generation.cc
+++ b/grammar_codegen/code_generation.cc
@@ -14,12 +14,20 @@
#include "./grammar_codegen/code_generation.h"
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "./grammar_codegen/backend.h"
+
namespace fuzztest::internal::grammar {
std::string GenerateGrammarHeader(
- const std::vector<std::string>& input_grammar_specs) {
+ const std::vector<std::string>& input_grammar_specs,
+ std::optional<std::string> grammar_name) {
GrammarInfoBuilder builder;
- CodeGenerator backend(builder.BuildGrammarInfo(input_grammar_specs));
+ CodeGenerator backend(
+ builder.BuildGrammarInfo(input_grammar_specs, grammar_name));
return backend.Generate();
}
} // namespace fuzztest::internal::grammar
diff --git a/grammar_codegen/code_generation.h b/grammar_codegen/code_generation.h
index 95f5e05..30200f9 100644
--- a/grammar_codegen/code_generation.h
+++ b/grammar_codegen/code_generation.h
@@ -15,11 +15,12 @@
#ifndef FUZZTEST_GRAMMAR_CODEGEN_CODE_GENERATION_H_
#define FUZZTEST_GRAMMAR_CODEGEN_CODE_GENERATION_H_
+#include <optional>
#include <string>
#include <vector>
-#include "./grammar_codegen/backend.h"
#include "./grammar_codegen/antlr_frontend.h"
+#include "./grammar_codegen/backend.h"
namespace fuzztest::internal::grammar {
@@ -27,7 +28,8 @@
// process of building grammar information from grammar files and generating
// code from grammar information.
std::string GenerateGrammarHeader(
- const std::vector<std::string>& input_grammar_specs);
+ const std::vector<std::string>& input_grammar_specs,
+ std::optional<std::string> grammar_name = std::nullopt);
} // namespace fuzztest::internal::grammar
diff --git a/grammar_codegen/code_generation_test.cc b/grammar_codegen/code_generation_test.cc
index dad4d72..57431d5 100644
--- a/grammar_codegen/code_generation_test.cc
+++ b/grammar_codegen/code_generation_test.cc
@@ -67,7 +67,7 @@
const std::vector<std::string> input_files{
GetContents(absl::StrCat(src_dir, "fuzztest/grammars/JSON.g4"))};
const std::string generated_header =
- fuzztest::internal::grammar::GenerateGrammarHeader(input_files);
+ fuzztest::internal::grammar::GenerateGrammarHeader(input_files, "json");
const std::string ground_true_header = GetContents(absl::StrCat(
src_dir, "grammar_codegen/testdata/expected_json_grammar.h"));
diff --git a/tools/grammar_domain_code_generator.cc b/tools/grammar_domain_code_generator.cc
index d99bbf7..ad0c8a9 100644
--- a/tools/grammar_domain_code_generator.cc
+++ b/tools/grammar_domain_code_generator.cc
@@ -21,6 +21,7 @@
// --output_header_file=json_grammar.h
#include <filesystem>
+#include <optional>
#include <string>
#include <vector>
@@ -36,6 +37,12 @@
std::vector<std::string>, input_grammar_files, std::vector<std::string>(),
"Required. The nonempty list of the input grammar specification files.");
+ABSL_FLAG(
+ std::string, grammar_name, "",
+ "Optional. The name of the top level grammar rule. The domain "
+ "generates strings of the grammar rule under this name. It is also in used"
+ "the domain name.");
+
namespace {
std::string GetContents(const std::string& path) {
@@ -63,6 +70,11 @@
!input_files.empty(),
"You must provide the list of input files, separated by ','");
+ std::optional<std::string> grammar_name = std::nullopt;
+ if (!absl::GetFlag(FLAGS_grammar_name).empty()) {
+ grammar_name = absl::GetFlag(FLAGS_grammar_name);
+ }
+
std::vector<std::string> input_grammar_specs;
for (const std::string& input_file : input_files) {
FUZZTEST_INTERNAL_CHECK_PRECONDITION(std::filesystem::exists(input_file),
@@ -70,7 +82,7 @@
input_grammar_specs.push_back(GetContents(input_file));
}
output_file << fuzztest::internal::grammar::GenerateGrammarHeader(
- input_grammar_specs);
+ input_grammar_specs, grammar_name);
output_file.close();
return 0;
}