No public description PiperOrigin-RevId: 555801756
diff --git a/build_defs/cc_fuzztest_grammar_library.bzl b/build_defs/cc_fuzztest_grammar_library.bzl index a63a939..367a6a3 100644 --- a/build_defs/cc_fuzztest_grammar_library.bzl +++ b/build_defs/cc_fuzztest_grammar_library.bzl
@@ -15,7 +15,7 @@ """Build rules to create cc_library that implements the InGrammar domain for a given grammar from an ANTLRv4 grammar specification.""" -def cc_fuzztest_grammar_library(name, srcs): +def cc_fuzztest_grammar_library(name, srcs, grammar_name = None): """Generates the C++ library corresponding to an antlr4 grammar specification. Args: @@ -24,14 +24,15 @@ """ output_file_name = name + ".h" + cmd = "$(location //tools:grammar_domain_code_generator) " + "--output_header_file_path " + "$(@D)/" + output_file_name + " --input_grammar_files " + "`echo $(SRCS) | tr ' ' ','`" + if grammar_name: + cmd += " --grammar_name " + grammar_name native.genrule( name = name + "_source", srcs = srcs, outs = [output_file_name], - cmd = "$(location //tools:grammar_domain_code_generator) " + - "--output_header_file_path " + - "$(@D)/" + output_file_name + " --input_grammar_files " + "$(SRCS)", + cmd = cmd, heuristic_label_expansion = False, tools = ["//tools:grammar_domain_code_generator"], )
diff --git a/fuzztest/grammars/BUILD b/fuzztest/grammars/BUILD index 96b2c30..f362fec 100644 --- a/fuzztest/grammars/BUILD +++ b/fuzztest/grammars/BUILD
@@ -28,4 +28,5 @@ cc_fuzztest_grammar_library( name = "json_grammar", srcs = ["JSON.g4"], + grammar_name = "json", )
diff --git a/fuzztest/grammars/CMakeLists.txt b/fuzztest/grammars/CMakeLists.txt index 069a401..b225485 100644 --- a/fuzztest/grammars/CMakeLists.txt +++ b/fuzztest/grammars/CMakeLists.txt
@@ -1,4 +1,4 @@ -function(fuzztest_grammar_library lib_name) +function(fuzztest_grammar_library lib_name grammar_name) if (ARGN) set(${lib_name}_grammar_files "") set(${lib_name}_output_file ${lib_name}.h) @@ -11,6 +11,7 @@ OUTPUT ${${lib_name}_output_file} COMMAND $<TARGET_FILE:grammar_domain_code_generator> + --grammar_name ${grammar_name} --input_grammar_files ${all_grammar_files} --output_header_file_path ${CMAKE_CURRENT_BINARY_DIR}/${${lib_name}_output_file} @@ -38,4 +39,4 @@ endif() endfunction() -fuzztest_grammar_library("json_grammar" ./JSON.g4) +fuzztest_grammar_library("json_grammar" "json" ./JSON.g4)
diff --git a/fuzztest/grammars/JSON.g4 b/fuzztest/grammars/JSON.g4 index cacc2cf..818b1ca 100644 --- a/fuzztest/grammars/JSON.g4 +++ b/fuzztest/grammars/JSON.g4
@@ -18,7 +18,7 @@ // - Restricted character set (e.g., no unicode chars). // - No escape sequences (e.g., \n, \t, \uff01, etc.) -grammar JSON; +grammar JSON_GRAMMAR; json : element ;
diff --git a/grammar_codegen/antlr_frontend.cc b/grammar_codegen/antlr_frontend.cc index 322fac0..5679445 100644 --- a/grammar_codegen/antlr_frontend.cc +++ b/grammar_codegen/antlr_frontend.cc
@@ -22,6 +22,7 @@ #include <vector> #include "absl/strings/ascii.h" +#include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "./grammar_codegen/generated_antlr_parser/ANTLRv4Lexer.h" #include "./grammar_codegen/grammar_info.h" @@ -165,10 +166,11 @@ Range GrammarInfoBuilder::ParseRange(std::string_view s) { return (s == "?") ? Range::kOptional : (s == "+") ? Range::kNonEmpty - : (s == "*") ? Range::kUnlimited - : (FUZZTEST_INTERNAL_CHECK( - false, absl::StrCat("Unhandled case: ", s)), - Range::kNoRange); + : (s == "*" || s == "+?" || s == "*?") + ? Range::kUnlimited + : (FUZZTEST_INTERNAL_CHECK(false, + absl::StrCat("Unhandled case: ", s)), + Range::kNoRange); } Block GrammarInfoBuilder::ConstructBlock( @@ -258,7 +260,8 @@ } Grammar GrammarInfoBuilder::BuildGrammarInfo( - const std::vector<std::string>& input_grammar_specs) { + const std::vector<std::string>& input_grammar_specs, + std::optional<std::string> grammar_name) { FUZZTEST_INTERNAL_CHECK_PRECONDITION(!input_grammar_specs.empty(), "No input files!"); for (auto& input_grammar_spec : input_grammar_specs) { @@ -278,6 +281,9 @@ FUZZTEST_INTERNAL_CHECK(false, "Unknown errors!"); } } + if (grammar_name.has_value()) { + grammar_name_ = *grammar_name; + } FUZZTEST_INTERNAL_CHECK(!grammar_name_.empty() && !rules_.empty(), "Wrong grammar file!"); return Grammar{std::move(grammar_name_), std::move(rules_)};
diff --git a/grammar_codegen/antlr_frontend.h b/grammar_codegen/antlr_frontend.h index 17679b8..414e7f2 100644 --- a/grammar_codegen/antlr_frontend.h +++ b/grammar_codegen/antlr_frontend.h
@@ -34,7 +34,9 @@ // information for every symbol and constructs the IR. class GrammarInfoBuilder : public antlr4_grammar::ANTLRv4ParserBaseListener { public: - Grammar BuildGrammarInfo(const std::vector<std::string>& input_grammar_specs); + Grammar BuildGrammarInfo( + const std::vector<std::string>& input_grammar_specs, + std::optional<std::string> grammar_name = std::nullopt); // Every symbol in the grammar has a handler function in the listener, which // will be called when such a symbol is visited during tree traversal. The
diff --git a/grammar_codegen/backend.cc b/grammar_codegen/backend.cc index 657b52e..6f80e45 100644 --- a/grammar_codegen/backend.cc +++ b/grammar_codegen/backend.cc
@@ -149,6 +149,33 @@ return absl::StrFormat("Optional<k%s, %s>", parent_type, child_type); } } + +bool HasEOF(const ProductionWithFallbackIndex& productions) { + for (const ProductionRule& production : productions.production_rules) { + for (const Block& block : production.blocks) { + const auto& element = block.element; + if (element.index() == BlockType::kNonTerminal) { + if (std::get<NonTerminal>(element).name == "EOF") { + return true; + } + } else if (element.index() == BlockType::kSubProductions) { + if (HasEOF(std::get<ProductionWithFallbackIndex>(element))) { + return true; + } + } + } + } + return false; +} + +bool HasEOF(const Grammar& grammar) { + for (const GrammarRule& rule : grammar.rules) { + if (HasEOF(rule.productions)) { + return true; + } + } + return false; +} } // namespace void CodeGenerator::Preprocess(Grammar& grammar) { @@ -161,6 +188,13 @@ } grammar.rules.insert(grammar.rules.end(), new_grammar_rules.begin(), new_grammar_rules.end()); + if (HasEOF(grammar)) { + ProductionRule prod_rule = {{Block{ + Range::kNoRange, Terminal{TerminalType::kStringLiteral, "\"\""}}}}; + GrammarRule eof_rule = + GrammarRule{"EOF", ProductionWithFallbackIndex{0, {prod_rule}}}; + grammar.rules.push_back(eof_rule); + } } std::string CodeGenerator::Generate() { @@ -388,7 +422,9 @@ } while (has_change); for (size_t i = 0; i < safe_rule_indexes.size(); ++i) { - FUZZTEST_INTERNAL_CHECK(safe_rule_indexes[i], "Some node is not safe!"); + FUZZTEST_INTERNAL_CHECK( + safe_rule_indexes[i], + absl::StrCat("Some node is not safe: ", rules[i].symbol_name)); } // Ensure that every sub-block is marked safe. For example, a grammar rule
diff --git a/grammar_codegen/code_generation.cc b/grammar_codegen/code_generation.cc index 35b2180..af0fe2e 100644 --- a/grammar_codegen/code_generation.cc +++ b/grammar_codegen/code_generation.cc
@@ -14,12 +14,20 @@ #include "./grammar_codegen/code_generation.h" +#include <optional> +#include <string> +#include <vector> + +#include "./grammar_codegen/backend.h" + namespace fuzztest::internal::grammar { std::string GenerateGrammarHeader( - const std::vector<std::string>& input_grammar_specs) { + const std::vector<std::string>& input_grammar_specs, + std::optional<std::string> grammar_name) { GrammarInfoBuilder builder; - CodeGenerator backend(builder.BuildGrammarInfo(input_grammar_specs)); + CodeGenerator backend( + builder.BuildGrammarInfo(input_grammar_specs, grammar_name)); return backend.Generate(); } } // namespace fuzztest::internal::grammar
diff --git a/grammar_codegen/code_generation.h b/grammar_codegen/code_generation.h index 95f5e05..30200f9 100644 --- a/grammar_codegen/code_generation.h +++ b/grammar_codegen/code_generation.h
@@ -15,11 +15,12 @@ #ifndef FUZZTEST_GRAMMAR_CODEGEN_CODE_GENERATION_H_ #define FUZZTEST_GRAMMAR_CODEGEN_CODE_GENERATION_H_ +#include <optional> #include <string> #include <vector> -#include "./grammar_codegen/backend.h" #include "./grammar_codegen/antlr_frontend.h" +#include "./grammar_codegen/backend.h" namespace fuzztest::internal::grammar { @@ -27,7 +28,8 @@ // process of building grammar information from grammar files and generating // code from grammar information. std::string GenerateGrammarHeader( - const std::vector<std::string>& input_grammar_specs); + const std::vector<std::string>& input_grammar_specs, + std::optional<std::string> grammar_name = std::nullopt); } // namespace fuzztest::internal::grammar
diff --git a/grammar_codegen/code_generation_test.cc b/grammar_codegen/code_generation_test.cc index dad4d72..57431d5 100644 --- a/grammar_codegen/code_generation_test.cc +++ b/grammar_codegen/code_generation_test.cc
@@ -67,7 +67,7 @@ const std::vector<std::string> input_files{ GetContents(absl::StrCat(src_dir, "fuzztest/grammars/JSON.g4"))}; const std::string generated_header = - fuzztest::internal::grammar::GenerateGrammarHeader(input_files); + fuzztest::internal::grammar::GenerateGrammarHeader(input_files, "json"); const std::string ground_true_header = GetContents(absl::StrCat( src_dir, "grammar_codegen/testdata/expected_json_grammar.h"));
diff --git a/tools/grammar_domain_code_generator.cc b/tools/grammar_domain_code_generator.cc index d99bbf7..ad0c8a9 100644 --- a/tools/grammar_domain_code_generator.cc +++ b/tools/grammar_domain_code_generator.cc
@@ -21,6 +21,7 @@ // --output_header_file=json_grammar.h #include <filesystem> +#include <optional> #include <string> #include <vector> @@ -36,6 +37,12 @@ std::vector<std::string>, input_grammar_files, std::vector<std::string>(), "Required. The nonempty list of the input grammar specification files."); +ABSL_FLAG( + std::string, grammar_name, "", + "Optional. The name of the top level grammar rule. The domain " + "generates strings of the grammar rule under this name. It is also in used" + "the domain name."); + namespace { std::string GetContents(const std::string& path) { @@ -63,6 +70,11 @@ !input_files.empty(), "You must provide the list of input files, separated by ','"); + std::optional<std::string> grammar_name = std::nullopt; + if (!absl::GetFlag(FLAGS_grammar_name).empty()) { + grammar_name = absl::GetFlag(FLAGS_grammar_name); + } + std::vector<std::string> input_grammar_specs; for (const std::string& input_file : input_files) { FUZZTEST_INTERNAL_CHECK_PRECONDITION(std::filesystem::exists(input_file), @@ -70,7 +82,7 @@ input_grammar_specs.push_back(GetContents(input_file)); } output_file << fuzztest::internal::grammar::GenerateGrammarHeader( - input_grammar_specs); + input_grammar_specs, grammar_name); output_file.close(); return 0; }