pw_tokenizer/detokenize_fuzzer.cc - pigweed/pigweed - Git at Google

 // Copyright 2020 The Pigweed Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy of
 // the License at
 //
 //     https://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 // License for the specific language governing permissions and limitations under
 // the License.

 // This file implements a basic fuzz test for the Detokenizer.
 // An instance of the Detokenizer is created from a minimal, nearly-empty token
 // database. Fuzz data is fed to the detokenizer in various supported input
 // argument formats at random, when then decodes this data and tries to match
 // it to tokens in the database.

 #include <cstddef>
 #include <cstdint>
 #include <cstring>
 #include <vector>

 #include "pw_fuzzer/fuzzed_data_provider.h"
 #include "pw_preprocessor/util.h"
 #include "pw_tokenizer/detokenize.h"

 namespace pw::tokenizer {
 namespace {

 constexpr size_t kFuzzRangeMin = 0;
 constexpr size_t kFuzzRangeMax = 10000;

 enum DetokenizeBufferArgumentType : uint8_t {
   kSpan = 0,
   kStringView,
   kPtrAndLength,
   kMaxValue = kPtrAndLength
 };

 // In order to better fuzz the detokenizer, rather than use an empty token
 // database, we construct a minimal database with 4 entries out of a string
 // literal array that matches the token database format (see token_database.h
 // for detailed info on the database entry format)
 alignas(TokenDatabase::RawEntry) constexpr char kBasicData[] =
     "TOKENS\0\0"
     "\x04\x00\x00\x00"
     "\0\0\0\0"
     "\x01\x00\x00\x00----"
     "\x05\x00\x00\x00----"
     "\xFF\x00\x00\x00----"
     "\xFF\xEE\xEE\xDD----"
     "One\0"
     "TWO\0"
     "333\0"
     "FOUR";

 }  // namespace

 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
   static Detokenizer detokenizer(TokenDatabase::Create<kBasicData>());

   FuzzedDataProvider provider(data, size);

   while (provider.remaining_bytes() != 0) {
     // Map the first word of the remaining fuzz data to a buffer argument
     // type, and feed the Detokenizer with a random length buffer to be
     // detokenized in the relevant format. The detokenized string returned
     // is itself of little consequence to this test.
     switch (provider.ConsumeEnum<DetokenizeBufferArgumentType>()) {
       case kSpan: {
         size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(
             kFuzzRangeMin, kFuzzRangeMax);
         std::vector<uint8_t> buffer =
             provider.ConsumeBytes<uint8_t>(consumed_size);
         auto detokenized_string =
             detokenizer.Detokenize(std::span(&buffer[0], buffer.size()));
         PW_UNUSED(detokenized_string);
         break;
       }

       case kStringView: {
         std::string str =
             provider.ConsumeRandomLengthString(provider.remaining_bytes());
         auto detokenized_string = detokenizer.Detokenize(str);
         PW_UNUSED(detokenized_string);
         break;
       }

       case kPtrAndLength: {
         size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(
             kFuzzRangeMin, kFuzzRangeMax);
         std::vector<uint8_t> buffer =
             provider.ConsumeBytes<uint8_t>(consumed_size);
         auto detokenized_string =
             detokenizer.Detokenize(&buffer[0], buffer.size());
         PW_UNUSED(detokenized_string);
         break;
       }
     }
   }

   return 0;
 }

 }  // namespace pw::tokenizer
	// Copyright 2020 The Pigweed Authors
	//
	// Licensed under the Apache License, Version 2.0 (the "License"); you may not
	// use this file except in compliance with the License. You may obtain a copy of
	// the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
	// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
	// License for the specific language governing permissions and limitations under
	// the License.

	// This file implements a basic fuzz test for the Detokenizer.
	// An instance of the Detokenizer is created from a minimal, nearly-empty token
	// database. Fuzz data is fed to the detokenizer in various supported input
	// argument formats at random, when then decodes this data and tries to match
	// it to tokens in the database.

	#include <cstddef>
	#include <cstdint>
	#include <cstring>
	#include <vector>

	#include "pw_fuzzer/fuzzed_data_provider.h"
	#include "pw_preprocessor/util.h"
	#include "pw_tokenizer/detokenize.h"

	namespace pw::tokenizer {
	namespace {

	constexpr size_t kFuzzRangeMin = 0;
	constexpr size_t kFuzzRangeMax = 10000;

	enum DetokenizeBufferArgumentType : uint8_t {
	kSpan = 0,
	kStringView,
	kPtrAndLength,
	kMaxValue = kPtrAndLength
	};

	// In order to better fuzz the detokenizer, rather than use an empty token
	// database, we construct a minimal database with 4 entries out of a string
	// literal array that matches the token database format (see token_database.h
	// for detailed info on the database entry format)
	alignas(TokenDatabase::RawEntry) constexpr char kBasicData[] =
	"TOKENS\0\0"
	"\x04\x00\x00\x00"
	"\0\0\0\0"
	"\x01\x00\x00\x00----"
	"\x05\x00\x00\x00----"
	"\xFF\x00\x00\x00----"
	"\xFF\xEE\xEE\xDD----"
	"One\0"
	"TWO\0"
	"333\0"
	"FOUR";

	} // namespace

	extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
	static Detokenizer detokenizer(TokenDatabase::Create<kBasicData>());

	FuzzedDataProvider provider(data, size);

	while (provider.remaining_bytes() != 0) {
	// Map the first word of the remaining fuzz data to a buffer argument
	// type, and feed the Detokenizer with a random length buffer to be
	// detokenized in the relevant format. The detokenized string returned
	// is itself of little consequence to this test.
	switch (provider.ConsumeEnum<DetokenizeBufferArgumentType>()) {
	case kSpan: {
	size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(
	kFuzzRangeMin, kFuzzRangeMax);
	std::vector<uint8_t> buffer =
	provider.ConsumeBytes<uint8_t>(consumed_size);
	auto detokenized_string =
	detokenizer.Detokenize(std::span(&buffer[0], buffer.size()));
	PW_UNUSED(detokenized_string);
	break;
	}

	case kStringView: {
	std::string str =
	provider.ConsumeRandomLengthString(provider.remaining_bytes());
	auto detokenized_string = detokenizer.Detokenize(str);
	PW_UNUSED(detokenized_string);
	break;
	}

	case kPtrAndLength: {
	size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(
	kFuzzRangeMin, kFuzzRangeMax);
	std::vector<uint8_t> buffer =
	provider.ConsumeBytes<uint8_t>(consumed_size);
	auto detokenized_string =
	detokenizer.Detokenize(&buffer[0], buffer.size());
	PW_UNUSED(detokenized_string);
	break;
	}
	}
	}

	return 0;
	}

	} // namespace pw::tokenizer