blob: 2c16830983930f7689443da17c6d6a3b1e01267c [file] [log] [blame]
// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Allow dynamic symbol lookup in an in-memory Elf image.
//
#include "absl/debugging/internal/elf_mem_image.h"
#ifdef ABSL_HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h
#include <string.h>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include "absl/base/config.h"
#include "absl/base/internal/raw_logging.h"
// From binutils/include/elf/common.h (this doesn't appear to be documented
// anywhere else).
//
// /* This flag appears in a Versym structure. It means that the symbol
// is hidden, and is only visible with an explicit version number.
// This is a GNU extension. */
// #define VERSYM_HIDDEN 0x8000
//
// /* This is the mask for the rest of the Versym information. */
// #define VERSYM_VERSION 0x7fff
#define VERSYM_VERSION 0x7fff
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace debugging_internal {
namespace {
#if __SIZEOF_POINTER__ == 4
const int kElfClass = ELFCLASS32;
int ElfBind(const ElfW(Sym) *symbol) { return ELF32_ST_BIND(symbol->st_info); }
int ElfType(const ElfW(Sym) *symbol) { return ELF32_ST_TYPE(symbol->st_info); }
#elif __SIZEOF_POINTER__ == 8
const int kElfClass = ELFCLASS64;
int ElfBind(const ElfW(Sym) *symbol) { return ELF64_ST_BIND(symbol->st_info); }
int ElfType(const ElfW(Sym) *symbol) { return ELF64_ST_TYPE(symbol->st_info); }
#else
const int kElfClass = -1;
int ElfBind(const ElfW(Sym) *) {
ABSL_RAW_LOG(FATAL, "Unexpected word size");
return 0;
}
int ElfType(const ElfW(Sym) *) {
ABSL_RAW_LOG(FATAL, "Unexpected word size");
return 0;
}
#endif
// Extract an element from one of the ELF tables, cast it to desired type.
// This is just a simple arithmetic and a glorified cast.
// Callers are responsible for bounds checking.
template <typename T>
const T *GetTableElement(const ElfW(Ehdr) * ehdr, ElfW(Off) table_offset,
ElfW(Word) element_size, size_t index) {
return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr)
+ table_offset
+ index * element_size);
}
} // namespace
// The value of this variable doesn't matter; it's used only for its
// unique address.
const int ElfMemImage::kInvalidBaseSentinel = 0;
ElfMemImage::ElfMemImage(const void *base) {
ABSL_RAW_CHECK(base != kInvalidBase, "bad pointer");
Init(base);
}
uint32_t ElfMemImage::GetNumSymbols() const { return num_syms_; }
const ElfW(Sym) * ElfMemImage::GetDynsym(uint32_t index) const {
ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
return dynsym_ + index;
}
const ElfW(Versym) *ElfMemImage::GetVersym(uint32_t index) const {
ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
return versym_ + index;
}
const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const {
ABSL_RAW_CHECK(index >= 0 && index < ehdr_->e_phnum, "index out of range");
return GetTableElement<ElfW(Phdr)>(ehdr_, ehdr_->e_phoff, ehdr_->e_phentsize,
static_cast<size_t>(index));
}
const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const {
ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
return dynstr_ + offset;
}
const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const {
if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) {
// Symbol corresponds to "special" (e.g. SHN_ABS) section.
return reinterpret_cast<const void *>(sym->st_value);
}
ABSL_RAW_CHECK(link_base_ < sym->st_value, "symbol out of range");
return GetTableElement<char>(ehdr_, 0, 1, sym->st_value - link_base_);
}
const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const {
ABSL_RAW_CHECK(0 <= index && static_cast<size_t>(index) <= verdefnum_,
"index out of range");
const ElfW(Verdef) *version_definition = verdef_;
while (version_definition->vd_ndx < index && version_definition->vd_next) {
const char *const version_definition_as_char =
reinterpret_cast<const char *>(version_definition);
version_definition =
reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char +
version_definition->vd_next);
}
return version_definition->vd_ndx == index ? version_definition : nullptr;
}
const ElfW(Verdaux) *ElfMemImage::GetVerdefAux(
const ElfW(Verdef) *verdef) const {
return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1);
}
const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const {
ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
return dynstr_ + offset;
}
void ElfMemImage::Init(const void *base) {
ehdr_ = nullptr;
dynsym_ = nullptr;
dynstr_ = nullptr;
versym_ = nullptr;
verdef_ = nullptr;
num_syms_ = 0;
strsize_ = 0;
verdefnum_ = 0;
// Sentinel: PT_LOAD .p_vaddr can't possibly be this.
link_base_ = ~ElfW(Addr){0}; // NOLINT(readability/braces)
if (!base) {
return;
}
const char *const base_as_char = reinterpret_cast<const char *>(base);
if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 ||
base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) {
assert(false);
return;
}
int elf_class = base_as_char[EI_CLASS];
if (elf_class != kElfClass) {
assert(false);
return;
}
switch (base_as_char[EI_DATA]) {
case ELFDATA2LSB: {
#ifndef ABSL_IS_LITTLE_ENDIAN
assert(false);
return;
#endif
break;
}
case ELFDATA2MSB: {
#ifndef ABSL_IS_BIG_ENDIAN
assert(false);
return;
#endif
break;
}
default: {
assert(false);
return;
}
}
ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base);
const ElfW(Phdr) *dynamic_program_header = nullptr;
for (int i = 0; i < ehdr_->e_phnum; ++i) {
const ElfW(Phdr) *const program_header = GetPhdr(i);
switch (program_header->p_type) {
case PT_LOAD:
if (!~link_base_) {
link_base_ = program_header->p_vaddr;
}
break;
case PT_DYNAMIC:
dynamic_program_header = program_header;
break;
}
}
if (!~link_base_ || !dynamic_program_header) {
assert(false);
// Mark this image as not present. Can not recur infinitely.
Init(nullptr);
return;
}
ptrdiff_t relocation =
base_as_char - reinterpret_cast<const char *>(link_base_);
ElfW(Dyn)* dynamic_entry = reinterpret_cast<ElfW(Dyn)*>(
static_cast<intptr_t>(dynamic_program_header->p_vaddr) + relocation);
uint32_t *sysv_hash = nullptr;
uint32_t *gnu_hash = nullptr;
for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) {
const auto value =
static_cast<intptr_t>(dynamic_entry->d_un.d_val) + relocation;
switch (dynamic_entry->d_tag) {
case DT_HASH:
sysv_hash = reinterpret_cast<uint32_t *>(value);
break;
case DT_GNU_HASH:
gnu_hash = reinterpret_cast<uint32_t *>(value);
break;
case DT_SYMTAB:
dynsym_ = reinterpret_cast<ElfW(Sym) *>(value);
break;
case DT_STRTAB:
dynstr_ = reinterpret_cast<const char *>(value);
break;
case DT_VERSYM:
versym_ = reinterpret_cast<ElfW(Versym) *>(value);
break;
case DT_VERDEF:
verdef_ = reinterpret_cast<ElfW(Verdef) *>(value);
break;
case DT_VERDEFNUM:
verdefnum_ = static_cast<size_t>(dynamic_entry->d_un.d_val);
break;
case DT_STRSZ:
strsize_ = static_cast<size_t>(dynamic_entry->d_un.d_val);
break;
default:
// Unrecognized entries explicitly ignored.
break;
}
}
if ((!sysv_hash && !gnu_hash) || !dynsym_ || !dynstr_ || !versym_ ||
!verdef_ || !verdefnum_ || !strsize_) {
assert(false); // invalid VDSO
// Mark this image as not present. Can not recur infinitely.
Init(nullptr);
return;
}
if (sysv_hash) {
num_syms_ = sysv_hash[1];
} else {
assert(gnu_hash);
// Compute the number of symbols for DT_GNU_HASH, which is specified by
// https://sourceware.org/gnu-gabi/program-loading-and-dynamic-linking.txt
uint32_t nbuckets = gnu_hash[0];
// The buckets array is located after the header (4 uint32) and the bloom
// filter (size_t array of gnu_hash[2] elements).
uint32_t *buckets = gnu_hash + 4 + sizeof(size_t) / 4 * gnu_hash[2];
// Find the chain of the last non-empty bucket.
uint32_t idx = 0;
for (uint32_t i = nbuckets; i > 0;) {
idx = buckets[--i];
if (idx != 0) break;
}
if (idx != 0) {
// Find the last element of the chain, which has an odd value.
// Add one to get the number of symbols.
uint32_t *chain = buckets + nbuckets - gnu_hash[1];
while (chain[idx++] % 2 == 0) {
}
}
num_syms_ = idx;
}
}
bool ElfMemImage::LookupSymbol(const char *name,
const char *version,
int type,
SymbolInfo *info_out) const {
for (const SymbolInfo& info : *this) {
if (strcmp(info.name, name) == 0 && strcmp(info.version, version) == 0 &&
ElfType(info.symbol) == type) {
if (info_out) {
*info_out = info;
}
return true;
}
}
return false;
}
bool ElfMemImage::LookupSymbolByAddress(const void *address,
SymbolInfo *info_out) const {
for (const SymbolInfo& info : *this) {
const char *const symbol_start =
reinterpret_cast<const char *>(info.address);
const char *const symbol_end = symbol_start + info.symbol->st_size;
if (symbol_start <= address && address < symbol_end) {
if (info_out) {
// Client wants to know details for that symbol (the usual case).
if (ElfBind(info.symbol) == STB_GLOBAL) {
// Strong symbol; just return it.
*info_out = info;
return true;
} else {
// Weak or local. Record it, but keep looking for a strong one.
*info_out = info;
}
} else {
// Client only cares if there is an overlapping symbol.
return true;
}
}
}
return false;
}
ElfMemImage::SymbolIterator::SymbolIterator(const void *const image,
uint32_t index)
: index_(index), image_(image) {}
const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const {
return &info_;
}
const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const {
return info_;
}
bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const {
return this->image_ == rhs.image_ && this->index_ == rhs.index_;
}
bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const {
return !(*this == rhs);
}
ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() {
this->Update(1);
return *this;
}
ElfMemImage::SymbolIterator ElfMemImage::begin() const {
SymbolIterator it(this, 0);
it.Update(0);
return it;
}
ElfMemImage::SymbolIterator ElfMemImage::end() const {
return SymbolIterator(this, GetNumSymbols());
}
void ElfMemImage::SymbolIterator::Update(uint32_t increment) {
const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_);
ABSL_RAW_CHECK(image->IsPresent() || increment == 0, "");
if (!image->IsPresent()) {
return;
}
index_ += increment;
if (index_ >= image->GetNumSymbols()) {
index_ = image->GetNumSymbols();
return;
}
const ElfW(Sym) *symbol = image->GetDynsym(index_);
const ElfW(Versym) *version_symbol = image->GetVersym(index_);
ABSL_RAW_CHECK(symbol && version_symbol, "");
const char *const symbol_name = image->GetDynstr(symbol->st_name);
#if defined(__NetBSD__)
const int version_index = version_symbol->vs_vers & VERSYM_VERSION;
#else
const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION;
#endif
const ElfW(Verdef) *version_definition = nullptr;
const char *version_name = "";
if (symbol->st_shndx == SHN_UNDEF) {
// Undefined symbols reference DT_VERNEED, not DT_VERDEF, and
// version_index could well be greater than verdefnum_, so calling
// GetVerdef(version_index) may trigger assertion.
} else {
version_definition = image->GetVerdef(version_index);
}
if (version_definition) {
// I am expecting 1 or 2 auxiliary entries: 1 for the version itself,
// optional 2nd if the version has a parent.
ABSL_RAW_CHECK(
version_definition->vd_cnt == 1 || version_definition->vd_cnt == 2,
"wrong number of entries");
const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition);
version_name = image->GetVerstr(version_aux->vda_name);
}
info_.name = symbol_name;
info_.version = version_name;
info_.address = image->GetSymAddr(symbol);
info_.symbol = symbol;
}
} // namespace debugging_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_HAVE_ELF_MEM_IMAGE