libhfuzz+linux/bfd: unify static and dynamic (cmp) dictionaries into one
diff --git a/cmdline.c b/cmdline.c
index 1f6c6da..797049b 100644
--- a/cmdline.c
+++ b/cmdline.c
@@ -401,7 +401,6 @@
.covFeedbackFd = -1,
.cmpFeedbackMap = NULL,
.cmpFeedbackFd = -1,
- .cmpFeedback = true,
.blocklistFile = NULL,
.blocklist = NULL,
.blocklistCnt = 0,
@@ -531,7 +530,6 @@
{ { "netdriver", no_argument, NULL, 0x10C }, "Use netdriver (libhfnetdriver/). In most cases it will be autodetected through a binary signature" },
{ { "only_printable", no_argument, NULL, 0x10D }, "Only generate printable inputs" },
{ { "export_feedback", no_argument, NULL, 0x10E }, "Export the coverage feedback structure as ./hfuzz-feedback" },
- { { "const_feedback", required_argument, NULL, 0x112 }, "Use constant integer/string values from fuzzed programs to mangle input files via a dynamic dictionary (default: true)" },
{ { "pin_thread_cpu", required_argument, NULL, 0x114 }, "Pin a single execution thread to this many consecutive CPUs (default: 0 = no CPU pinning)" },
{ { "dynamic_input", required_argument, NULL, 0x115 }, "Path to a directory containing the dynamic file corpus" },
{ { "statsfile", required_argument, NULL, 0x116 }, "Stats file" },
@@ -666,9 +664,6 @@
case 0x10E:
hfuzz->io.exportFeedback = true;
break;
- case 0x112:
- hfuzz->feedback.cmpFeedback = cmdlineParseTrueFalse(opts[opt_index].name, optarg);
- break;
case 'z':
hfuzz->feedback.dynFileMethod |= _HF_DYNFILE_SOFT;
break;
diff --git a/honggfuzz.c b/honggfuzz.c
index 25974f2..01713bd 100644
--- a/honggfuzz.c
+++ b/honggfuzz.c
@@ -428,13 +428,32 @@
LOG_F("files_mapSharedMem(name='hf-covfeddback', sz=%zu, dir='%s') failed",
sizeof(feedback_t), hfuzz.io.workDir);
}
- if (hfuzz.feedback.cmpFeedback) {
- if (!(hfuzz.feedback.cmpFeedbackMap = files_mapSharedMem(sizeof(cmpfeedback_t),
- &hfuzz.feedback.cmpFeedbackFd, "hf-cmpfeedback", /* nocore= */ true,
- /* export= */ hfuzz.io.exportFeedback))) {
- LOG_F("files_mapSharedMem(name='hf-cmpfeedback', sz=%zu, dir='%s') failed",
- sizeof(cmpfeedback_t), hfuzz.io.workDir);
+#if defined(_HF_ARCH_LINUX) && !defined(_HF_LINUX_NO_BFD)
+ arch_bfdExtractRodataStrArrays(&hfuzz);
+#endif
+ if (!(hfuzz.feedback.cmpFeedbackMap = files_mapSharedMem(sizeof(fuzz_data_t),
+ &hfuzz.feedback.cmpFeedbackFd, "hf-cmpfeedback", /* nocore= */ true,
+ /* export= */ hfuzz.io.exportFeedback))) {
+ LOG_F("files_mapSharedMem(name='hf-cmpfeedback', sz=%zu, dir='%s') failed",
+ sizeof(fuzz_data_t), hfuzz.io.workDir);
+ }
+ if (hfuzz.feedback.cmpFeedbackMap) {
+#if defined(_HF_ARCH_LINUX) && !defined(_HF_LINUX_NO_BFD)
+ arch_elfCollectRoValues(&hfuzz);
+#endif
+ for (size_t i = 0;
+ i < hfuzz.mutate.dictionaryCnt && i < ARRAYSIZE(hfuzz.feedback.cmpFeedbackMap->dict);
+ i++) {
+ size_t len = hfuzz.mutate.dictionary[i].len;
+ if (len > sizeof(hfuzz.feedback.cmpFeedbackMap->dict[i].val)) {
+ len = sizeof(hfuzz.feedback.cmpFeedbackMap->dict[i].val);
+ }
+ memcpy(hfuzz.feedback.cmpFeedbackMap->dict[i].val, hfuzz.mutate.dictionary[i].val, len);
+ hfuzz.feedback.cmpFeedbackMap->dict[i].len = len;
}
+ hfuzz.feedback.cmpFeedbackMap->dictCnt =
+ HF_MIN(hfuzz.mutate.dictionaryCnt, ARRAYSIZE(hfuzz.feedback.cmpFeedbackMap->dict));
+ hfuzz.feedback.cmpFeedbackMap->dictStaticCnt = hfuzz.feedback.cmpFeedbackMap->dictCnt;
}
/* Stats file. */
if (hfuzz.io.statsFileName) {
diff --git a/honggfuzz.h b/honggfuzz.h
index ebc37bb..3818900 100644
--- a/honggfuzz.h
+++ b/honggfuzz.h
@@ -219,12 +219,19 @@
} feedback_t;
typedef struct {
- struct {
- uint8_t val[64];
- uint32_t len;
- } valArr[1024 * 16];
- uint32_t cnt;
-} cmpfeedback_t;
+ uint8_t val[64];
+ uint32_t len;
+} dict_entry_t;
+
+typedef struct {
+ dict_entry_t dict[1024 * 32];
+ uint32_t dictCnt;
+ uint32_t dictStaticCnt;
+ uint32_t ro32[1024 * 128];
+ uint32_t ro32Cnt;
+ uint64_t ro64[1024 * 128];
+ uint32_t ro64Cnt;
+} fuzz_data_t;
typedef struct {
struct {
@@ -330,9 +337,8 @@
fuzzState_t state;
feedback_t* covFeedbackMap;
int covFeedbackFd;
- cmpfeedback_t* cmpFeedbackMap;
+ fuzz_data_t* cmpFeedbackMap;
int cmpFeedbackFd;
- bool cmpFeedback;
const char* blocklistFile;
uint64_t* blocklist;
size_t blocklistCnt;
diff --git a/libhfcommon/util.c b/libhfcommon/util.c
index cf14ff1..fa44218 100644
--- a/libhfcommon/util.c
+++ b/libhfcommon/util.c
@@ -24,7 +24,6 @@
#include "util.h"
#include <ctype.h>
-#include <elf.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
@@ -977,257 +976,9 @@
return (lhfc_addr_t)dl_iterate_phdr(addrStatic_cb, (void*)addr);
}
-/* Collected values from read-only sections */
-static uint32_t* roVals32 = NULL;
-static size_t roVals32_cnt = 0;
-static size_t roVals32_cap = 0;
-static uint64_t* roVals64 = NULL;
-static size_t roVals64_cnt = 0;
-static size_t roVals64_cap = 0;
-
-static int cmp_u32(const void* a, const void* b) {
- uint32_t va = *(const uint32_t*)a;
- uint32_t vb = *(const uint32_t*)b;
- if (va < vb) return -1;
- if (va > vb) return 1;
- return 0;
-}
-
-static int cmp_u64(const void* a, const void* b) {
- uint64_t va = *(const uint64_t*)a;
- uint64_t vb = *(const uint64_t*)b;
- if (va < vb) return -1;
- if (va > vb) return 1;
- return 0;
-}
-
-static bool util_isInterestingSection(const char* name) {
- if (strcmp(name, ".rodata") == 0) return true;
- if (strcmp(name, ".text") == 0) return true;
- if (strcmp(name, ".data") == 0) return true;
- if (strcmp(name, ".data.rel.ro") == 0) return true;
- if (strncmp(name, ".rodata.", 8) == 0) return true;
- if (strncmp(name, ".data.rel.ro.", 13) == 0) return true;
- return false;
-}
-
-static void util_add32(uint32_t v) {
- if (roVals32_cnt >= roVals32_cap) {
- roVals32_cap = roVals32_cap ? roVals32_cap * 2 : 1024;
- roVals32 = util_Realloc(roVals32, roVals32_cap * sizeof(uint32_t));
- }
- roVals32[roVals32_cnt++] = v;
-}
-
-static void util_add64(uint64_t v) {
- if (roVals64_cnt >= roVals64_cap) {
- roVals64_cap = roVals64_cap ? roVals64_cap * 2 : 1024;
- roVals64 = util_Realloc(roVals64, roVals64_cap * sizeof(uint64_t));
- }
- roVals64[roVals64_cnt++] = v;
-}
-
-static void util_analyzeSection(const char* name, const uint8_t* p, size_t sz) {
- LOG_D("Analyzing section: '%s' (size: %zu) for integer values", name, sz);
- for (size_t off = 0; off + sizeof(uint32_t) <= sz; off += sizeof(uint32_t)) {
- uint32_t v;
- memcpy(&v, p + off, sizeof(v));
- util_add32(v);
- }
- for (size_t off = 0; off + sizeof(uint64_t) <= sz; off += sizeof(uint64_t)) {
- uint64_t v;
- memcpy(&v, p + off, sizeof(v));
- util_add64(v);
- }
-}
-
-static void util_collectELF64(const uint8_t* map, size_t sz) {
- const Elf64_Ehdr* ehdr = (const Elf64_Ehdr*)map;
- if ((uint64_t)sz < ehdr->e_shoff + (ehdr->e_shentsize * ehdr->e_shnum)) return;
-
- const Elf64_Shdr* shdr = (const Elf64_Shdr*)(map + ehdr->e_shoff);
- if (ehdr->e_shstrndx >= ehdr->e_shnum) return;
-
- const char* strtab = (const char*)(map + shdr[ehdr->e_shstrndx].sh_offset);
- if ((const uint8_t*)strtab >= map + sz) return;
-
- for (int i = 0; i < ehdr->e_shnum; i++) {
- if (shdr[i].sh_offset + shdr[i].sh_size > (uint64_t)sz) continue;
- const char* name = strtab + shdr[i].sh_name;
- if (util_isInterestingSection(name)) {
- util_analyzeSection(name, map + shdr[i].sh_offset, shdr[i].sh_size);
- }
- }
-}
-
-static void util_collectELF32(const uint8_t* map, size_t sz) {
- const Elf32_Ehdr* ehdr = (const Elf32_Ehdr*)map;
- if ((uint64_t)sz < ehdr->e_shoff + (ehdr->e_shentsize * ehdr->e_shnum)) return;
-
- const Elf32_Shdr* shdr = (const Elf32_Shdr*)(map + ehdr->e_shoff);
- if (ehdr->e_shstrndx >= ehdr->e_shnum) return;
-
- const char* strtab = (const char*)(map + shdr[ehdr->e_shstrndx].sh_offset);
- if ((const uint8_t*)strtab >= map + sz) return;
-
- for (int i = 0; i < ehdr->e_shnum; i++) {
- if (shdr[i].sh_offset + shdr[i].sh_size > (uint64_t)sz) continue;
- const char* name = strtab + shdr[i].sh_name;
- if (util_isInterestingSection(name)) {
- util_analyzeSection(name, map + shdr[i].sh_offset, shdr[i].sh_size);
- }
- }
-}
-
-static void collectRoValues(void) {
- const char* fname = "/proc/self/exe";
-#if defined(__FreeBSD__) || defined(__DragonFly__)
- fname = "/proc/curproc/file";
-#elif defined(_HF_ARCH_NETBSD)
- fname = "/proc/curproc/exe";
-#elif defined(__sun)
- fname = "/proc/self/path/a.out";
-#endif
-
- int fd = TEMP_FAILURE_RETRY(open(fname, O_RDONLY | O_CLOEXEC));
- if (fd == -1) {
- LOG_W("open('%s', O_RDONLY|O_CLOEXEC)", fname);
- return;
- }
- LOG_D("Opening file for RO value collection: %s", fname);
-
- struct stat st;
- if (fstat(fd, &st) == -1) {
- LOG_W("fstat('%s', fd=%d)", fname, fd);
- close(fd);
- return;
- }
- if ((size_t)st.st_size < sizeof(Elf32_Ehdr)) {
- close(fd);
- return;
- }
-
- const uint8_t* map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
- if (map == MAP_FAILED) {
- close(fd);
- return;
- }
-
- if (map[EI_CLASS] == ELFCLASS64) {
- util_collectELF64(map, st.st_size);
- } else if (map[EI_CLASS] == ELFCLASS32) {
- util_collectELF32(map, st.st_size);
- }
-
- munmap((void*)map, st.st_size);
- close(fd);
-
- /* Sort arrays */
- if (roVals32_cnt > 1) {
- qsort(roVals32, roVals32_cnt, sizeof(uint32_t), cmp_u32);
- }
- if (roVals64_cnt > 1) {
- qsort(roVals64, roVals64_cnt, sizeof(uint64_t), cmp_u64);
- }
-
- /* Deduplicate 32-bit values in-place */
- if (roVals32_cnt > 1) {
- size_t w = 1;
- for (size_t r = 1; r < roVals32_cnt; r++) {
- if (roVals32[r] != roVals32[w - 1]) {
- roVals32[w++] = roVals32[r];
- }
- }
- roVals32_cnt = w;
- }
-
- /* Deduplicate 64-bit values in-place */
- if (roVals64_cnt > 1) {
- size_t w = 1;
- for (size_t r = 1; r < roVals64_cnt; r++) {
- if (roVals64[r] != roVals64[w - 1]) {
- roVals64[w++] = roVals64[r];
- }
- }
- roVals64_cnt = w;
- }
-
- LOG_I("Parsed %s: found %zu 32-bit and %zu 64-bit interesting values", fname, roVals32_cnt,
- roVals64_cnt);
-
- /* Shrink to actual size */
- if (roVals32_cnt > 0) {
- roVals32 = util_Realloc(roVals32, roVals32_cnt * sizeof(uint32_t));
- } else {
- free(roVals32);
- roVals32 = NULL;
- }
- if (roVals64_cnt > 0) {
- roVals64 = util_Realloc(roVals64, roVals64_cnt * sizeof(uint64_t));
- } else {
- free(roVals64);
- roVals64 = NULL;
- }
-}
-
-static pthread_once_t roValsInitOnce = PTHREAD_ONCE_INIT;
-
-bool util_32bitValInBinary(uint32_t v) {
- pthread_once(&roValsInitOnce, collectRoValues);
-
- if (roVals32_cnt == 0) {
- return false;
- }
-
- /* Binary search */
- size_t lo = 0, hi = roVals32_cnt;
- while (lo < hi) {
- size_t mid = lo + (hi - lo) / 2;
- if (roVals32[mid] < v) {
- lo = mid + 1;
- } else {
- hi = mid;
- }
- }
- return (lo < roVals32_cnt && roVals32[lo] == v);
-}
-
-bool util_64bitValInBinary(uint64_t v) {
- pthread_once(&roValsInitOnce, collectRoValues);
-
- if (roVals64_cnt == 0) {
- return false;
- }
-
- /* Binary search */
- size_t lo = 0, hi = roVals64_cnt;
- while (lo < hi) {
- size_t mid = lo + (hi - lo) / 2;
- if (roVals64[mid] < v) {
- lo = mid + 1;
- } else {
- hi = mid;
- }
- }
- return (lo < roVals64_cnt && roVals64[lo] == v);
-}
-
-bool util_16bitValInBinary(uint16_t v HF_ATTR_UNUSED) {
- /* 16-bit values are too common to be useful for dictionary extraction */
- return false;
-}
#else /* !defined(_HF_ARCH_DARWIN) && !defined(__CYGWIN__) */
/* Darwin doesn't use ELF file format for binaries, so dl_iterate_phdr() cannot be used there */
lhfc_addr_t util_getProgAddr(const void* addr HF_ATTR_UNUSED) {
return LHFC_ADDR_NOTFOUND;
}
-bool util_16bitValInBinary(uint16_t v HF_ATTR_UNUSED) {
- return false;
-}
-bool util_32bitValInBinary(uint32_t v HF_ATTR_UNUSED) {
- return false;
-}
-bool util_64bitValInBinary(uint64_t v HF_ATTR_UNUSED) {
- return false;
-}
#endif /* !defined(_HF_ARCH_DARWIN) && !defined(__CYGWIN__) */
diff --git a/libhfcommon/util.h b/libhfcommon/util.h
index a7aeecc..ed62bf8 100644
--- a/libhfcommon/util.h
+++ b/libhfcommon/util.h
@@ -220,9 +220,6 @@
extern void util_closeStdio(bool close_stdin, bool close_stdout, bool close_stderr);
extern lhfc_addr_t util_getProgAddr(const void* addr);
-extern bool util_16bitValInBinary(uint16_t v);
-extern bool util_32bitValInBinary(uint32_t v);
-extern bool util_64bitValInBinary(uint64_t v);
extern uint64_t util_hash(const char* buf, size_t len);
extern int64_t fastArray64Search(uint64_t* array, size_t arraySz, uint64_t key);
diff --git a/libhfuzz/instrument.c b/libhfuzz/instrument.c
index ef52d4e..eace1d0 100644
--- a/libhfuzz/instrument.c
+++ b/libhfuzz/instrument.c
@@ -46,9 +46,9 @@
*/
static feedback_t bbMapFb;
-feedback_t* globalCovFeedback = &bbMapFb;
-feedback_t* localCovFeedback = &bbMapFb;
-cmpfeedback_t* globalCmpFeedback = NULL;
+feedback_t* globalCovFeedback = &bbMapFb;
+feedback_t* localCovFeedback = &bbMapFb;
+fuzz_data_t* globalCmpFeedback = NULL;
uint32_t my_thread_no = 0;
@@ -158,17 +158,17 @@
if (fstat(_HF_CMP_BITMAP_FD, &st) == -1) {
return;
}
- if (st.st_size != sizeof(cmpfeedback_t)) {
+ if (st.st_size != sizeof(fuzz_data_t)) {
LOG_W(
- "Size of the globalCmpFeedback structure mismatch: st.size != sizeof(cmpfeedback_t) "
+ "Size of the globalCmpFeedback structure mismatch: st.size != sizeof(fuzz_data_t) "
"(%zu != %zu). Link your fuzzed binaries with the newest honggfuzz and hfuzz-clang(++)",
- (size_t)st.st_size, sizeof(cmpfeedback_t));
+ (size_t)st.st_size, sizeof(fuzz_data_t));
return;
}
- void* ret = initializeTryMapHugeTLB(_HF_CMP_BITMAP_FD, sizeof(cmpfeedback_t));
+ void* ret = initializeTryMapHugeTLB(_HF_CMP_BITMAP_FD, sizeof(fuzz_data_t));
if (ret == MAP_FAILED) {
PLOG_W("mmap(_HF_CMP_BITMAP_FD=%d, size=%zu) of the feedback structure failed",
- _HF_CMP_BITMAP_FD, sizeof(cmpfeedback_t));
+ _HF_CMP_BITMAP_FD, sizeof(fuzz_data_t));
return;
}
ATOMIC_SET(globalCmpFeedback, ret);
@@ -307,29 +307,50 @@
if (len <= 1) {
return;
}
- if (len > sizeof(globalCmpFeedback->valArr[0].val)) {
- len = sizeof(globalCmpFeedback->valArr[0].val);
+ if (len > sizeof(globalCmpFeedback->dict[0].val)) {
+ len = sizeof(globalCmpFeedback->dict[0].val);
}
- const uint32_t arrSize = ARRAYSIZE(globalCmpFeedback->valArr);
- uint32_t curroff = ATOMIC_GET(globalCmpFeedback->cnt);
+ const uint32_t arrSize = ARRAYSIZE(globalCmpFeedback->dict);
+ const uint32_t staticCnt = globalCmpFeedback->dictStaticCnt;
+ uint32_t curroff = ATOMIC_GET(globalCmpFeedback->dictCnt);
- uint32_t scanLimit = (curroff < arrSize) ? curroff : arrSize;
- uint32_t scanStart = (curroff > scanLimit) ? (curroff - scanLimit) : 0;
+ uint32_t checkCnt = 16384;
+ uint32_t scanLimit = (curroff < checkCnt) ? curroff : checkCnt;
+ uint32_t scanStart = curroff - scanLimit;
+
for (uint32_t i = scanStart; i < curroff; i++) {
- uint32_t idx = i % arrSize;
- if ((len == ATOMIC_GET(globalCmpFeedback->valArr[idx].len)) &&
- hf_memcmp(globalCmpFeedback->valArr[idx].val, mem, len) == 0) {
+ uint32_t idx;
+ if (i < arrSize) {
+ idx = i;
+ } else {
+ uint32_t dynSz = arrSize - staticCnt;
+ if (dynSz == 0)
+ idx = 0;
+ else
+ idx = staticCnt + ((i - arrSize) % dynSz);
+ }
+
+ if ((len == ATOMIC_GET(globalCmpFeedback->dict[idx].len)) &&
+ hf_memcmp(globalCmpFeedback->dict[idx].val, mem, len) == 0) {
return;
}
}
- /* Ring buffer: wrap around when full */
- uint32_t newoff = ATOMIC_POST_INC(globalCmpFeedback->cnt);
- uint32_t idx = newoff % arrSize;
+ uint32_t newoff = ATOMIC_POST_INC(globalCmpFeedback->dictCnt);
+ uint32_t idx;
+ if (newoff < arrSize) {
+ idx = newoff;
+ } else {
+ uint32_t dynSz = arrSize - staticCnt;
+ if (dynSz == 0)
+ idx = 0;
+ else
+ idx = staticCnt + ((newoff - arrSize) % dynSz);
+ }
- memcpy(globalCmpFeedback->valArr[idx].val, mem, len);
- ATOMIC_SET(globalCmpFeedback->valArr[idx].len, len);
+ memcpy(globalCmpFeedback->dict[idx].val, mem, len);
+ ATOMIC_SET(globalCmpFeedback->dict[idx].len, len);
}
/*
@@ -452,17 +473,49 @@
return true;
}
+static bool instrument32bitValInBinary(uint32_t v) {
+ if (!globalCmpFeedback || globalCmpFeedback->ro32Cnt == 0) {
+ return false;
+ }
+ size_t lo = 0, hi = globalCmpFeedback->ro32Cnt;
+ while (lo < hi) {
+ size_t mid = lo + (hi - lo) / 2;
+ if (globalCmpFeedback->ro32[mid] < v) {
+ lo = mid + 1;
+ } else {
+ hi = mid;
+ }
+ }
+ return (lo < globalCmpFeedback->ro32Cnt && globalCmpFeedback->ro32[lo] == v);
+}
+
+static bool instrument64bitValInBinary(uint64_t v) {
+ if (!globalCmpFeedback || globalCmpFeedback->ro64Cnt == 0) {
+ return false;
+ }
+ size_t lo = 0, hi = globalCmpFeedback->ro64Cnt;
+ while (lo < hi) {
+ size_t mid = lo + (hi - lo) / 2;
+ if (globalCmpFeedback->ro64[mid] < v) {
+ lo = mid + 1;
+ } else {
+ hi = mid;
+ }
+ }
+ return (lo < globalCmpFeedback->ro64Cnt && globalCmpFeedback->ro64[lo] == v);
+}
+
void __sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) {
/* Add 4byte values to the const_dictionary if they exist within the binary */
if (globalCmpFeedback) {
if (instrumentLimitEvery(16383)) {
if (instrumentValueInteresting(Arg1)) {
- if (util_32bitValInBinary(Arg1)) {
+ if (instrument32bitValInBinary(Arg1)) {
instrumentAddConstMemInternal(&Arg1, sizeof(Arg1));
}
}
if (instrumentValueInteresting(Arg2)) {
- if (util_32bitValInBinary(Arg2)) {
+ if (instrument32bitValInBinary(Arg2)) {
instrumentAddConstMemInternal(&Arg2, sizeof(Arg2));
}
}
@@ -477,12 +530,12 @@
if (globalCmpFeedback) {
if (instrumentLimitEvery(16383)) {
if (instrumentValueInteresting(Arg1)) {
- if (util_64bitValInBinary(Arg1)) {
+ if (instrument64bitValInBinary(Arg1)) {
instrumentAddConstMemInternal(&Arg1, sizeof(Arg1));
}
}
if (instrumentValueInteresting(Arg2)) {
- if (util_64bitValInBinary(Arg2)) {
+ if (instrument64bitValInBinary(Arg2)) {
instrumentAddConstMemInternal(&Arg2, sizeof(Arg2));
}
}
diff --git a/linux/bfd.c b/linux/bfd.c
index 70b07eb..1ad3ec3 100644
--- a/linux/bfd.c
+++ b/linux/bfd.c
@@ -746,4 +746,141 @@
return total_cnt;
}
+static int arch_cmp_u32(const void* a, const void* b) {
+ uint32_t va = *(const uint32_t*)a;
+ uint32_t vb = *(const uint32_t*)b;
+ if (va < vb) return -1;
+ if (va > vb) return 1;
+ return 0;
+}
+
+static int arch_cmp_u64(const void* a, const void* b) {
+ uint64_t va = *(const uint64_t*)a;
+ uint64_t vb = *(const uint64_t*)b;
+ if (va < vb) return -1;
+ if (va > vb) return 1;
+ return 0;
+}
+
+static bool arch_isInterestingSection(const char* name) {
+ if (strcmp(name, ".rodata") == 0) return true;
+ if (strcmp(name, ".data") == 0) return true;
+ if (strcmp(name, ".data.rel.ro") == 0) return true;
+ if (strncmp(name, ".rodata.", 8) == 0) return true;
+ if (strncmp(name, ".data.rel.ro.", 13) == 0) return true;
+ /*
+ * .text is too random and unaligned
+ * if (strcmp(name, ".text") == 0) return true;
+ */
+ return false;
+}
+
+static void arch_analyzeSection(honggfuzz_t* hfuzz, const char* name, const uint8_t* p, size_t sz) {
+ LOG_D("Analyzing section: '%s' (size: %zu) for integer values", name, sz);
+
+ fuzz_data_t* fb = hfuzz->feedback.cmpFeedbackMap;
+
+ for (size_t off = 0; off + sizeof(uint32_t) <= sz; off += sizeof(uint32_t)) {
+ uint32_t v;
+ memcpy(&v, p + off, sizeof(v));
+ if (fb->ro32Cnt < ARRAYSIZE(fb->ro32)) {
+ fb->ro32[fb->ro32Cnt++] = v;
+ }
+ }
+ for (size_t off = 0; off + sizeof(uint64_t) <= sz; off += sizeof(uint64_t)) {
+ uint64_t v;
+ memcpy(&v, p + off, sizeof(v));
+ if (fb->ro64Cnt < ARRAYSIZE(fb->ro64)) {
+ fb->ro64[fb->ro64Cnt++] = v;
+ }
+ }
+}
+
+void arch_elfCollectRoValues(honggfuzz_t* hfuzz) {
+ if (!hfuzz->feedback.cmpFeedbackMap) {
+ return;
+ }
+
+ MX_SCOPED_LOCK(&arch_bfd_mutex);
+
+ const char* fname = hfuzz->exe.cmdline[0];
+ bfd_init();
+ bfd* bfdh = bfd_openr(fname, NULL);
+ if (!bfdh) {
+ LOG_W("bfd_openr('%s') failed", fname);
+ return;
+ }
+
+ if (!bfd_check_format(bfdh, bfd_object)) {
+ LOG_W("bfd_check_format('%s') failed", fname);
+ bfd_close(bfdh);
+ return;
+ }
+
+ for (struct bfd_section* sec = bfdh->sections; sec; sec = sec->next) {
+ const char* name = bfd_section_name(sec);
+ if (!arch_isInterestingSection(name)) {
+ continue;
+ }
+
+ bfd_size_type sz = bfd_section_size(sec);
+ if (sz == 0) {
+ continue;
+ }
+
+ if (sz > 1024 * 1024 * 1024) { /* 1GiB */
+ LOG_W("Section '%s' size (%" PRIu64 ") is too large, skipping", name, (uint64_t)sz);
+ continue;
+ }
+
+ uint8_t* buf = util_Malloc(sz);
+ defer {
+ free(buf);
+ };
+ if (!bfd_get_section_contents(bfdh, sec, buf, 0, sz)) {
+ LOG_W("bfd_get_section_contents('%s') failed", name);
+ continue;
+ }
+
+ arch_analyzeSection(hfuzz, name, buf, sz);
+ }
+
+ bfd_close(bfdh);
+
+ fuzz_data_t* fb = hfuzz->feedback.cmpFeedbackMap;
+
+ /* Sort arrays */
+ if (fb->ro32Cnt > 1) {
+ qsort(fb->ro32, fb->ro32Cnt, sizeof(uint32_t), arch_cmp_u32);
+ }
+ if (fb->ro64Cnt > 1) {
+ qsort(fb->ro64, fb->ro64Cnt, sizeof(uint64_t), arch_cmp_u64);
+ }
+
+ /* Deduplicate 32-bit values in-place */
+ if (fb->ro32Cnt > 1) {
+ size_t w = 1;
+ for (size_t r = 1; r < fb->ro32Cnt; r++) {
+ if (fb->ro32[r] != fb->ro32[w - 1]) {
+ fb->ro32[w++] = fb->ro32[r];
+ }
+ }
+ fb->ro32Cnt = w;
+ }
+
+ /* Deduplicate 64-bit values in-place */
+ if (fb->ro64Cnt > 1) {
+ size_t w = 1;
+ for (size_t r = 1; r < fb->ro64Cnt; r++) {
+ if (fb->ro64[r] != fb->ro64[w - 1]) {
+ fb->ro64[w++] = fb->ro64[r];
+ }
+ }
+ fb->ro64Cnt = w;
+ }
+
+ LOG_I("Parsed %s: found %u 32-bit and %u 64-bit interesting values", fname, fb->ro32Cnt,
+ fb->ro64Cnt);
+}
+
#endif /* !defined(_HF_LINUX_NO_BFD) */
diff --git a/linux/bfd.h b/linux/bfd.h
index 9ac8d19..e246591 100644
--- a/linux/bfd.h
+++ b/linux/bfd.h
@@ -45,6 +45,7 @@
extern void arch_bfdDisasm(pid_t pid, uint8_t* mem, size_t size, char* instr);
extern size_t arch_bfdExtractStrArray(honggfuzz_t* hfuzz, const char* symName);
extern size_t arch_bfdExtractRodataStrArrays(honggfuzz_t* hfuzz);
+extern void arch_elfCollectRoValues(honggfuzz_t* hfuzz);
#endif /* !defined(_HF_LINUX_NO_BFD) */
diff --git a/mangle.c b/mangle.c
index 76cd887..ebc71d9 100644
--- a/mangle.c
+++ b/mangle.c
@@ -532,37 +532,22 @@
mangle_UseValue(run, mangleMagicVals[choice].val, mangleMagicVals[choice].size, printable);
}
-static void mangle_StaticDict(run_t* run, bool printable) {
- if (run->global->mutate.dictionaryCnt == 0) {
- mangle_Bytes(run, printable);
- return;
- }
- uint64_t choice = util_rndGet(0, run->global->mutate.dictionaryCnt - 1);
- mangle_UseValue(run, run->global->mutate.dictionary[choice].val,
- run->global->mutate.dictionary[choice].len, printable);
-}
-
static inline const uint8_t* mangle_FeedbackDict(run_t* run, size_t* len) {
- if (!run->global->feedback.cmpFeedback) {
- return NULL;
+ fuzz_data_t* cmpf = run->global->feedback.cmpFeedbackMap;
+ uint32_t cnt = ATOMIC_GET(cmpf->dictCnt);
+ if (cnt > 0) {
+ uint32_t max_idx = HF_MIN(cnt, ARRAYSIZE(cmpf->dict));
+ uint32_t choice = util_rndGet(0, max_idx - 1);
+ *len = (size_t)ATOMIC_GET(cmpf->dict[choice].len);
+ if (*len > 0) {
+ return cmpf->dict[choice].val;
+ }
}
- cmpfeedback_t* cmpf = run->global->feedback.cmpFeedbackMap;
- uint32_t cnt = ATOMIC_GET(cmpf->cnt);
- if (cnt == 0) {
- return NULL;
- }
- if (cnt > ARRAYSIZE(cmpf->valArr)) {
- cnt = ARRAYSIZE(cmpf->valArr);
- }
- uint32_t choice = util_rndGet(0, cnt - 1);
- *len = (size_t)ATOMIC_GET(cmpf->valArr[choice].len);
- if (*len == 0) {
- return NULL;
- }
- return cmpf->valArr[choice].val;
+
+ return NULL;
}
-static void mangle_ConstFeedbackDict(run_t* run, bool printable) {
+static void mangle_StaticDict(run_t* run, bool printable) {
size_t len;
const uint8_t* val = mangle_FeedbackDict(run, &len);
if (val == NULL) {
@@ -596,6 +581,10 @@
mangle_UseValue(run, val, len, printable);
}
+static void mangle_ConstFeedbackDict(run_t* run, bool printable) {
+ mangle_StaticDict(run, printable);
+}
+
static void mangle_MemSet(run_t* run, bool printable) {
size_t off = mangle_getOffSet(run);
size_t len = mangle_getLen(run->dynfile->size - off);
@@ -1002,31 +991,20 @@
}
static void mangle_CmpSolve(run_t* run, bool printable) {
- if (!run->global->feedback.cmpFeedback) {
- mangle_ConstFeedbackDict(run, printable);
- return;
- }
-
- cmpfeedback_t* cmpf = run->global->feedback.cmpFeedbackMap;
- uint32_t cnt = ATOMIC_GET(cmpf->cnt);
- if (cnt == 0) {
+ size_t cmp_len;
+ const uint8_t* cmp_val_ptr = mangle_FeedbackDict(run, &cmp_len);
+ if (cmp_val_ptr == NULL) {
mangle_Magic(run, printable);
return;
}
- if (cnt > ARRAYSIZE(cmpf->valArr)) {
- cnt = ARRAYSIZE(cmpf->valArr);
- }
-
- uint32_t choice = util_rndGet(0, cnt - 1);
- size_t cmp_len = (size_t)ATOMIC_GET(cmpf->valArr[choice].len);
if (cmp_len == 0 || cmp_len > 32) {
mangle_Magic(run, printable);
return;
}
uint8_t cmp_val[32];
- memcpy(cmp_val, cmpf->valArr[choice].val, cmp_len);
+ memcpy(cmp_val, cmp_val_ptr, cmp_len);
/* Find partial match in input */
for (size_t off = 0; off + cmp_len <= run->dynfile->size; off++) {
@@ -1348,31 +1326,20 @@
* Gradient-guided CMP mutation - focus mutations on bytes that differ in comparisons
*/
static void mangle_GradientCmp(run_t* run, bool printable) {
- if (!run->global->feedback.cmpFeedback) {
+ size_t cmp_len;
+ const uint8_t* cmp_val_ptr = mangle_FeedbackDict(run, &cmp_len);
+ if (cmp_val_ptr == NULL) {
mangle_Bytes(run, printable);
return;
}
- cmpfeedback_t* cmpf = run->global->feedback.cmpFeedbackMap;
- uint32_t cnt = ATOMIC_GET(cmpf->cnt);
- if (cnt == 0) {
- mangle_Magic(run, printable);
- return;
- }
-
- if (cnt > ARRAYSIZE(cmpf->valArr)) {
- cnt = ARRAYSIZE(cmpf->valArr);
- }
-
- uint32_t choice = util_rndGet(0, cnt - 1);
- size_t cmp_len = (size_t)ATOMIC_GET(cmpf->valArr[choice].len);
if (cmp_len == 0 || cmp_len > 32) {
mangle_Magic(run, printable);
return;
}
uint8_t cmp_val[32];
- memcpy(cmp_val, cmpf->valArr[choice].val, cmp_len);
+ memcpy(cmp_val, cmp_val_ptr, cmp_len);
/* Find partial match and identify differing bytes */
for (size_t off = 0; off + cmp_len <= run->dynfile->size; off++) {
@@ -1436,24 +1403,13 @@
* Arithmetic mutations on discovered constants from CMP feedback
*/
static void mangle_ArithConst(run_t* run, bool printable) {
- if (!run->global->feedback.cmpFeedback) {
+ size_t val_len;
+ const uint8_t* val_ptr = mangle_FeedbackDict(run, &val_len);
+ if (val_ptr == NULL) {
mangle_AddSub(run, printable);
return;
}
- cmpfeedback_t* cmpf = run->global->feedback.cmpFeedbackMap;
- uint32_t cnt = ATOMIC_GET(cmpf->cnt);
- if (cnt == 0) {
- mangle_AddSub(run, printable);
- return;
- }
-
- if (cnt > ARRAYSIZE(cmpf->valArr)) {
- cnt = ARRAYSIZE(cmpf->valArr);
- }
-
- uint32_t choice = util_rndGet(0, cnt - 1);
- size_t val_len = (size_t)ATOMIC_GET(cmpf->valArr[choice].len);
if (val_len == 0 || val_len > 8) {
mangle_AddSub(run, printable);
return;
@@ -1462,7 +1418,7 @@
/* Extract value as integer */
uint64_t val = 0;
for (size_t i = 0; i < val_len; i++) {
- val |= ((uint64_t)cmpf->valArr[choice].val[i]) << (i * 8);
+ val |= ((uint64_t)val_ptr[i]) << (i * 8);
}
/* Apply arithmetic mutation */
@@ -1504,14 +1460,19 @@
}
static void mangle_DictionaryInsert(run_t* run, bool printable) {
- if (run->global->mutate.dictionaryCnt == 0) {
+ size_t len1;
+ const uint8_t* val1 = mangle_FeedbackDict(run, &len1);
+ if (val1 == NULL) {
mangle_Bytes(run, printable);
return;
}
- size_t cnt = run->global->mutate.dictionaryCnt;
- uint64_t c1 = util_rndGet(0, cnt - 1);
- uint64_t c2 = util_rndGet(0, cnt - 1);
+ size_t len2;
+ const uint8_t* val2 = mangle_FeedbackDict(run, &len2);
+ if (val2 == NULL) {
+ mangle_Bytes(run, printable);
+ return;
+ }
const char* separators[] = {
"", " ", "\t", "\n", "\r\n", ",", ";", ":", "=", "&", "|", "(", ")", ".", "\"", "'"};
@@ -1519,8 +1480,6 @@
const char* sep = separators[sep_idx];
size_t sep_len = strlen(sep);
- size_t len1 = run->global->mutate.dictionary[c1].len;
- size_t len2 = run->global->mutate.dictionary[c2].len;
size_t total_len = len1 + sep_len + len2;
uint8_t* buf = util_Malloc(total_len);
@@ -1528,9 +1487,9 @@
free(buf);
};
- memcpy(buf, run->global->mutate.dictionary[c1].val, len1);
+ memcpy(buf, val1, len1);
memcpy(buf + len1, sep, sep_len);
- memcpy(buf + len1 + sep_len, run->global->mutate.dictionary[c2].val, len2);
+ memcpy(buf + len1 + sep_len, val2, len2);
mangle_UseValue(run, buf, total_len, printable);
}
@@ -1723,7 +1682,6 @@
if (!need) return m;
if ((need & 1) && run->global->mutate.dictionaryCnt == 0) return reqs[m].fallback;
- if ((need & 2) && !run->global->feedback.cmpFeedback) return reqs[m].fallback;
if ((need & 4) && run->global->feedback.dynFileMethod == _HF_DYNFILE_NONE)
return reqs[m].fallback;
@@ -1841,7 +1799,6 @@
time_t stagnation = time(NULL) - ATOMIC_GET(run->global->timing.lastCovUpdate);
uint64_t base = run->mutationsPerRun;
- bool haveCmp = run->global->feedback.cmpFeedback;
run->mutationTiers = 0;
@@ -1867,7 +1824,7 @@
* If we are stuck, we want to try more specific strategies (dictionaries, splices)
*/
if (stagnation > timeStagnated) {
- if (haveCmp && util_rnd64() % 3 == 0) {
+ if (util_rnd64() % 3 == 0) {
run->mutationTiers |= (1 << TIER_DATA);
mangle_dispatch(run, MANGLE_CMP_SOLVE, printable);
}
@@ -1876,7 +1833,7 @@
mangle_dispatch(run, MANGLE_SPLICE, printable);
}
/* Try gradient-guided CMP mutations */
- if (haveCmp && util_rnd64() % 4 == 0) {
+ if (util_rnd64() % 4 == 0) {
run->mutationTiers |= (1 << TIER_DATA);
mangle_dispatch(run, MANGLE_GRADIENT_CMP, printable);
}
diff --git a/subproc.c b/subproc.c
index 3f57a20..1cbe65e 100644
--- a/subproc.c
+++ b/subproc.c
@@ -292,8 +292,7 @@
return false;
}
/* The const comparison bitmap/feedback structure */
- if (run->global->feedback.cmpFeedback &&
- TEMP_FAILURE_RETRY(dup2(run->global->feedback.cmpFeedbackFd, _HF_CMP_BITMAP_FD)) == -1) {
+ if (TEMP_FAILURE_RETRY(dup2(run->global->feedback.cmpFeedbackFd, _HF_CMP_BITMAP_FD)) == -1) {
PLOG_E("dup2(%d, _HF_CMP_BITMAP_FD=%d)", run->global->feedback.cmpFeedbackFd,
_HF_CMP_BITMAP_FD);
return false;