scripts/tools/memory/memdf/collector/readelf.py - third_party/github/project-chip/connectedhomeip - Git at Google

 #
 # Copyright (c) 2021 Project CHIP Authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 """Collect memory information using `readelf` and `nm`."""

 import io
 import re
 from typing import Dict, List

 import elftools.elf.constants  # type: ignore
 import memdf.name
 import memdf.util.subprocess
 import pandas as pd  # type: ignore
 from memdf.collector.util import simplify_source
 from memdf.df import DFs, SectionDF, SegmentDF, SymbolDF, SymbolSourceDF
 from memdf.util.config import Config, ConfigDescription

 NM_CONFIG: ConfigDescription = {
     'tool.nm': {
         'help': 'File name of the nm executable',
         'metavar': 'FILE',
         'default': 'nm',
     },
 }

 READELF_CONFIG: ConfigDescription = {
     'tool.readelf': {
         'help': 'File name of the readelf executable',
         'metavar': 'FILE',
         'default': 'readelf',
     },
 }

 CONFIG: ConfigDescription = {
     **NM_CONFIG,
     **READELF_CONFIG,
 }


 def read_sources(config: Config, filename: str) -> SymbolSourceDF:
     """Reads a binary's symbol-to-compilation-unit map using nm"""
     # TBD: figure out how to get this via readelf.
     prefixes = config.get_re('collect.prefix')
     process = memdf.util.subprocess.run_tool_pipe(config,
                                                   ['nm', '-l', filename])
     if not process or not process.stdout:
         return SymbolSourceDF()
     text = io.TextIOWrapper(process.stdout)

     decoder = re.compile(
         r"""^((?P<address>[0-9a-fA-F]+)|\s+?)
             \s(?P<kind>\S)
             \s(?P<symbol>\S+)
             (\t(?P<source>\S+):(?P<line>\d+))?
             """, re.VERBOSE)
     columns = ['address', 'kind', 'symbol', 'cu']
     rows = []
     while line := text.readline():
         if not (match := decoder.match(line.rstrip())):
             continue
         if a := match.group('address'):
             address = int(a, 16)
         else:
             address = 0
         source = match.group('source') or ''
         if source:
             source = simplify_source(source, prefixes)
         rows.append(
             [address,
              match.group('kind'),
              match.group('symbol'), source])
     return SymbolSourceDF(rows, columns=columns)


 def add_cu(config: Config, filename: str, symbols: SymbolDF) -> SymbolDF:
     """Add a 'cu' (compilation unit) column to a symbol table."""
     sources = read_sources(config, filename).set_index(['symbol', 'address'])
     symbols = pd.merge(symbols, sources, on=('symbol', 'address'), how='left')
     symbols.fillna({'cu': ''}, inplace=True)
     return symbols


 def decode_section_flags(sflags: str) -> int:
     """Map readelf's representation of section flags to ELF flag values."""
     d = {
         'W': elftools.elf.constants.SH_FLAGS.SHF_WRITE,
         'A': elftools.elf.constants.SH_FLAGS.SHF_ALLOC,
         'X': elftools.elf.constants.SH_FLAGS.SHF_EXECINSTR,
         'M': elftools.elf.constants.SH_FLAGS.SHF_MERGE,
         'S': elftools.elf.constants.SH_FLAGS.SHF_STRINGS,
         'I': elftools.elf.constants.SH_FLAGS.SHF_INFO_LINK,
         'L': elftools.elf.constants.SH_FLAGS.SHF_LINK_ORDER,
         'O': elftools.elf.constants.SH_FLAGS.SHF_OS_NONCONFORMING,
         'G': elftools.elf.constants.SH_FLAGS.SHF_GROUP,
         'T': elftools.elf.constants.SH_FLAGS.SHF_TLS,
         'C': 0x800,  # SHF_COMPRESSED
         'E': elftools.elf.constants.SH_FLAGS.SHF_EXCLUDE,
         'y': 0x20000000,  # SHF_ARM_PURECODE
     }
     flags = 0
     for k, v in d.items():
         if k in sflags:
             flags |= v
     return flags


 def decode_segment_flags(sflags: str) -> int:
     """Map readelf's representation of segment flags to ELF flag values."""
     return ((int('R' in sflags) << 2) | (int('W' in sflags) << 1) |
             (int('E' in sflags)))


 def read_segments(text: io.TextIOWrapper) -> SegmentDF:
     """Read a segment table from readelf output."""
     decoder = re.compile(
         r"""^(?P<type>\w+)
             \s+(?P<offset>0x[0-9a-fA-F]+)
             \s+(?P<vaddress>0x[0-9a-fA-F]+)
             \s+(?P<paddress>0x[0-9a-fA-F]+)
             \s+(?P<filesize>0x[0-9a-fA-F]+)
             \s+(?P<size>0x[0-9a-fA-F]+)
             \s(?P<flags>.*)
             \s+0x(?P<align>[0-9a-fA-F]+)
             """, re.VERBOSE)
     columns = ['type', 'vaddress', 'paddress', 'size', 'flags']
     rows = []
     while line := text.readline():
         if not (match := decoder.match(line.strip())):
             break
         rows.append([
             match.group('type'),
             int(match.group('vaddress'), 16),
             int(match.group('paddress'), 16),
             int(match.group('size'), 16),
             decode_segment_flags(match.group('flags')),
         ])
     return SegmentDF(rows, columns=columns)


 def read_section_to_segment(text: io.TextIOWrapper) -> Dict[str, int]:
     """Read a section-to-segment map from readelf output."""
     section_to_segment = {}
     while line := text.readline().strip():
         s = line.split()
         segment = int(s[0], 10)
         for section in s[1:]:
             section_to_segment[section] = segment
     return section_to_segment


 def read_sections(text: io.TextIOWrapper) -> SectionDF:
     """Read a section table from readelf output."""
     columns = ['section', 'type', 'address', 'size', 'flags']
     rows = []
     decoder = re.compile(
         r"""^\[(?P<number>[\s\d]+)\]
             \s+(?P<section>\S*)
             \s+(?P<type>\S+)
             \s+(?P<address>[0-9a-fA-F]+)
             \s+(?P<offset>[0-9a-fA-F]+)
             \s+(?P<size>[0-9a-fA-F]+)
             \s+(?P<es>[0-9a-fA-F]+)
             \s(?P<flags>.*)
             \s(?P<lk>\d+)
             \s+(?P<inf>\d+)
             \s+(?P<align>\d+)
             """, re.VERBOSE)
     while line := text.readline():
         if not (match := decoder.match(line.strip())):
             break
         rows.append([
             match.group('section'),
             match.group('type'),
             int(match.group('address'), 16),
             int(match.group('size'), 16),
             decode_section_flags(match.group('flags')),
         ])
     return SectionDF(rows, columns=columns)


 def read_symbols(text: io.TextIOWrapper) -> SymbolDF:
     """Read a symbol table from readelf output."""
     columns = ['symbol', 'address', 'size', 'type', 'bind', 'shndx']
     rows = []
     decoder = re.compile(
         r"""^(?P<number>\d+):
             \s+(?P<address>[0-9a-fA-F]+)
             \s+(?P<size>\d+)
             \s+(?P<type>\S+)
             \s+(?P<bind>\S+)
             \s+(?P<vis>\S+)
             \s+(?P<shndx>\S+)
             \s*(?P<symbol>\S*)
             """, re.VERBOSE)
     while line := text.readline():
         if not (match := decoder.match(line.strip())):
             break
         symbol = match.group('symbol')
         stype = match.group('type')
         rows.append([
             symbol,
             int(match.group('address'), 16),
             int(match.group('size'), 10),
             stype,
             match.group('bind'),
             match.group('shndx'),
         ])
     return SymbolDF(rows, columns=columns)


 def read_file(config: Config, filename: str, method: str = None) -> DFs:
     """Read a binary's memory map using readelf."""
     process = memdf.util.subprocess.run_tool_pipe(config, [
         'readelf', '--wide', '--segments', '--sections', '--symbols', filename
     ])
     if not process or not process.stdout:
         return SegmentDF()
     segment_frames: List[SegmentDF] = []
     section_frames: List[SectionDF] = []
     symbol_frames: List[SymbolDF] = []
     section_to_segment = {}
     text = io.TextIOWrapper(process.stdout)
     while line := text.readline():
         line = line.strip()
         if line.startswith('Section Headers'):
             text.readline()
             section_frames.append(read_sections(text))
         elif line.startswith('Program Headers'):
             text.readline()
             segment_frames.append(read_segments(text))
         elif line.startswith('Section to Segment'):
             text.readline()
             section_to_segment.update(read_section_to_segment(text))
         elif line.startswith('Symbol table'):
             text.readline()
             symbol_frames.append(read_symbols(text))

     if segment_frames:
         segments = SegmentDF(pd.concat(segment_frames, ignore_index=True))
     else:
         segments = SegmentDF()
     if section_frames:
         sections = SectionDF(pd.concat(section_frames, ignore_index=True))
     else:
         sections = SectionDF()
     if symbol_frames:
         symbols = SymbolDF(pd.concat(symbol_frames, ignore_index=True))
     else:
         symbols = SymbolDF()

     # Add segment column to sections.
     sections['segment'] = sections['section'].apply(
         lambda s: section_to_segment.get(s, memdf.name.UNKNOWN))

     # Add section name column to symbols.
     section_map = {str(k): v for k, v in sections['section'].items()}
     section_map.update({
         '0': memdf.name.UNDEF,
         'UND': memdf.name.UNDEF,
         'ABS': memdf.name.ABS
     })
     if 'shndx' in symbols.columns:
         symbols['section'] = symbols['shndx'].apply(lambda s: section_map.get(
             s, memdf.name.UNKNOWN))
         symbols.drop(columns='shndx')
     else:
         symbols['section'] = ''

     if config['args.need_cu']:
         symbols = add_cu(config, filename, symbols)

     return {
         SegmentDF.name: segments,
         SectionDF.name: sections,
         SymbolDF.name: symbols
     }
	#
	# Copyright (c) 2021 Project CHIP Authors
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	"""Collect memory information using `readelf` and `nm`."""

	import io
	import re
	from typing import Dict, List

	import elftools.elf.constants # type: ignore
	import memdf.name
	import memdf.util.subprocess
	import pandas as pd # type: ignore
	from memdf.collector.util import simplify_source
	from memdf.df import DFs, SectionDF, SegmentDF, SymbolDF, SymbolSourceDF
	from memdf.util.config import Config, ConfigDescription

	NM_CONFIG: ConfigDescription = {
	'tool.nm': {
	'help': 'File name of the nm executable',
	'metavar': 'FILE',
	'default': 'nm',
	},
	}

	READELF_CONFIG: ConfigDescription = {
	'tool.readelf': {
	'help': 'File name of the readelf executable',
	'metavar': 'FILE',
	'default': 'readelf',
	},
	}

	CONFIG: ConfigDescription = {
	**NM_CONFIG,
	**READELF_CONFIG,
	}


	def read_sources(config: Config, filename: str) -> SymbolSourceDF:
	"""Reads a binary's symbol-to-compilation-unit map using nm"""
	# TBD: figure out how to get this via readelf.
	prefixes = config.get_re('collect.prefix')
	process = memdf.util.subprocess.run_tool_pipe(config,
	['nm', '-l', filename])
	if not process or not process.stdout:
	return SymbolSourceDF()
	text = io.TextIOWrapper(process.stdout)

	decoder = re.compile(
	r"""^((?P<address>[0-9a-fA-F]+)\|\s+?)
	\s(?P<kind>\S)
	\s(?P<symbol>\S+)
	(\t(?P<source>\S+):(?P<line>\d+))?
	""", re.VERBOSE)
	columns = ['address', 'kind', 'symbol', 'cu']
	rows = []
	while line := text.readline():
	if not (match := decoder.match(line.rstrip())):
	continue
	if a := match.group('address'):
	address = int(a, 16)
	else:
	address = 0
	source = match.group('source') or ''
	if source:
	source = simplify_source(source, prefixes)
	rows.append(
	[address,
	match.group('kind'),
	match.group('symbol'), source])
	return SymbolSourceDF(rows, columns=columns)


	def add_cu(config: Config, filename: str, symbols: SymbolDF) -> SymbolDF:
	"""Add a 'cu' (compilation unit) column to a symbol table."""
	sources = read_sources(config, filename).set_index(['symbol', 'address'])
	symbols = pd.merge(symbols, sources, on=('symbol', 'address'), how='left')
	symbols.fillna({'cu': ''}, inplace=True)
	return symbols


	def decode_section_flags(sflags: str) -> int:
	"""Map readelf's representation of section flags to ELF flag values."""
	d = {
	'W': elftools.elf.constants.SH_FLAGS.SHF_WRITE,
	'A': elftools.elf.constants.SH_FLAGS.SHF_ALLOC,
	'X': elftools.elf.constants.SH_FLAGS.SHF_EXECINSTR,
	'M': elftools.elf.constants.SH_FLAGS.SHF_MERGE,
	'S': elftools.elf.constants.SH_FLAGS.SHF_STRINGS,
	'I': elftools.elf.constants.SH_FLAGS.SHF_INFO_LINK,
	'L': elftools.elf.constants.SH_FLAGS.SHF_LINK_ORDER,
	'O': elftools.elf.constants.SH_FLAGS.SHF_OS_NONCONFORMING,
	'G': elftools.elf.constants.SH_FLAGS.SHF_GROUP,
	'T': elftools.elf.constants.SH_FLAGS.SHF_TLS,
	'C': 0x800, # SHF_COMPRESSED
	'E': elftools.elf.constants.SH_FLAGS.SHF_EXCLUDE,
	'y': 0x20000000, # SHF_ARM_PURECODE
	}
	flags = 0
	for k, v in d.items():
	if k in sflags:
	flags \|= v
	return flags


	def decode_segment_flags(sflags: str) -> int:
	"""Map readelf's representation of segment flags to ELF flag values."""
	return ((int('R' in sflags) << 2) \| (int('W' in sflags) << 1) \|
	(int('E' in sflags)))


	def read_segments(text: io.TextIOWrapper) -> SegmentDF:
	"""Read a segment table from readelf output."""
	decoder = re.compile(
	r"""^(?P<type>\w+)
	\s+(?P<offset>0x[0-9a-fA-F]+)
	\s+(?P<vaddress>0x[0-9a-fA-F]+)
	\s+(?P<paddress>0x[0-9a-fA-F]+)
	\s+(?P<filesize>0x[0-9a-fA-F]+)
	\s+(?P<size>0x[0-9a-fA-F]+)
	\s(?P<flags>.*)
	\s+0x(?P<align>[0-9a-fA-F]+)
	""", re.VERBOSE)
	columns = ['type', 'vaddress', 'paddress', 'size', 'flags']
	rows = []
	while line := text.readline():
	if not (match := decoder.match(line.strip())):
	break
	rows.append([
	match.group('type'),
	int(match.group('vaddress'), 16),
	int(match.group('paddress'), 16),
	int(match.group('size'), 16),
	decode_segment_flags(match.group('flags')),
	])
	return SegmentDF(rows, columns=columns)


	def read_section_to_segment(text: io.TextIOWrapper) -> Dict[str, int]:
	"""Read a section-to-segment map from readelf output."""
	section_to_segment = {}
	while line := text.readline().strip():
	s = line.split()
	segment = int(s[0], 10)
	for section in s[1:]:
	section_to_segment[section] = segment
	return section_to_segment


	def read_sections(text: io.TextIOWrapper) -> SectionDF:
	"""Read a section table from readelf output."""
	columns = ['section', 'type', 'address', 'size', 'flags']
	rows = []
	decoder = re.compile(
	r"""^\[(?P<number>[\s\d]+)\]
	\s+(?P<section>\S*)
	\s+(?P<type>\S+)
	\s+(?P<address>[0-9a-fA-F]+)
	\s+(?P<offset>[0-9a-fA-F]+)
	\s+(?P<size>[0-9a-fA-F]+)
	\s+(?P<es>[0-9a-fA-F]+)
	\s(?P<flags>.*)
	\s(?P<lk>\d+)
	\s+(?P<inf>\d+)
	\s+(?P<align>\d+)
	""", re.VERBOSE)
	while line := text.readline():
	if not (match := decoder.match(line.strip())):
	break
	rows.append([
	match.group('section'),
	match.group('type'),
	int(match.group('address'), 16),
	int(match.group('size'), 16),
	decode_section_flags(match.group('flags')),
	])
	return SectionDF(rows, columns=columns)


	def read_symbols(text: io.TextIOWrapper) -> SymbolDF:
	"""Read a symbol table from readelf output."""
	columns = ['symbol', 'address', 'size', 'type', 'bind', 'shndx']
	rows = []
	decoder = re.compile(
	r"""^(?P<number>\d+):
	\s+(?P<address>[0-9a-fA-F]+)
	\s+(?P<size>\d+)
	\s+(?P<type>\S+)
	\s+(?P<bind>\S+)
	\s+(?P<vis>\S+)
	\s+(?P<shndx>\S+)
	\s(?P<symbol>\S)
	""", re.VERBOSE)
	while line := text.readline():
	if not (match := decoder.match(line.strip())):
	break
	symbol = match.group('symbol')
	stype = match.group('type')
	rows.append([
	symbol,
	int(match.group('address'), 16),
	int(match.group('size'), 10),
	stype,
	match.group('bind'),
	match.group('shndx'),
	])
	return SymbolDF(rows, columns=columns)


	def read_file(config: Config, filename: str, method: str = None) -> DFs:
	"""Read a binary's memory map using readelf."""
	process = memdf.util.subprocess.run_tool_pipe(config, [
	'readelf', '--wide', '--segments', '--sections', '--symbols', filename
	])
	if not process or not process.stdout:
	return SegmentDF()
	segment_frames: List[SegmentDF] = []
	section_frames: List[SectionDF] = []
	symbol_frames: List[SymbolDF] = []
	section_to_segment = {}
	text = io.TextIOWrapper(process.stdout)
	while line := text.readline():
	line = line.strip()
	if line.startswith('Section Headers'):
	text.readline()
	section_frames.append(read_sections(text))
	elif line.startswith('Program Headers'):
	text.readline()
	segment_frames.append(read_segments(text))
	elif line.startswith('Section to Segment'):
	text.readline()
	section_to_segment.update(read_section_to_segment(text))
	elif line.startswith('Symbol table'):
	text.readline()
	symbol_frames.append(read_symbols(text))

	if segment_frames:
	segments = SegmentDF(pd.concat(segment_frames, ignore_index=True))
	else:
	segments = SegmentDF()
	if section_frames:
	sections = SectionDF(pd.concat(section_frames, ignore_index=True))
	else:
	sections = SectionDF()
	if symbol_frames:
	symbols = SymbolDF(pd.concat(symbol_frames, ignore_index=True))
	else:
	symbols = SymbolDF()

	# Add segment column to sections.
	sections['segment'] = sections['section'].apply(
	lambda s: section_to_segment.get(s, memdf.name.UNKNOWN))

	# Add section name column to symbols.
	section_map = {str(k): v for k, v in sections['section'].items()}
	section_map.update({
	'0': memdf.name.UNDEF,
	'UND': memdf.name.UNDEF,
	'ABS': memdf.name.ABS
	})
	if 'shndx' in symbols.columns:
	symbols['section'] = symbols['shndx'].apply(lambda s: section_map.get(
	s, memdf.name.UNKNOWN))
	symbols.drop(columns='shndx')
	else:
	symbols['section'] = ''

	if config['args.need_cu']:
	symbols = add_cu(config, filename, symbols)

	return {
	SegmentDF.name: segments,
	SectionDF.name: sections,
	SymbolDF.name: symbols
	}