scripts/tools/memory/memdf/collect.py - third_party/github/project-chip/connectedhomeip - Git at Google

 #
 # Copyright (c) 2021 Project CHIP Authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 """Collect information from various sources into Memory Map DataFrames."""

 import bisect

 from typing import Callable, Dict, List, Mapping, Optional, Sequence, Tuple

 from elftools.elf.constants import SH_FLAGS  # type: ignore
 import pandas as pd  # type: ignore

 import memdf.collector.bloaty
 import memdf.collector.csv
 import memdf.collector.elftools
 import memdf.collector.readelf
 import memdf.collector.su
 import memdf.name
 import memdf.select
 import memdf.util.config

 from memdf import (Config, ConfigDescription, DF, DFs, SymbolDF, SectionDF,
                    ExtentDF)
 from memdf.collector.util import simplify_source

 PREFIX_CONFIG: ConfigDescription = {
     'collect.prefix': {
         'help': 'Strip PATH from the beginning of source file names',
         'metavar': 'PATH',
         'default': [],
         'argparse': {
             'alias': ['--prefix', '--strip-prefix'],
             'action': 'append',
         }
     },
 }

 CONFIG: ConfigDescription = {
     Config.group_def('input'): {
         'title': 'input options',
     },
     Config.group_def('tool'): {
         'title': 'external tool options',
     },
     Config.group_map('collect'): {
         'group': 'input'
     },
     **memdf.collector.bloaty.CONFIG,
     **memdf.collector.csv.CONFIG,
     **memdf.collector.elftools.CONFIG,
     **memdf.collector.readelf.CONFIG,
     'collect.method': {
         'help':
             'Method of input processing: one of'
             ' elftools, readelf, bloaty, csv, tsv, su.',
         'metavar': 'METHOD',
         'choices': ['elftools', 'readelf', 'bloaty', 'csv', 'tsv', 'su'],
         'default': 'elftools',
         'argparse': {
             'alias': ['-f'],
         },
     },
     **PREFIX_CONFIG,
 }

 ARM_SPECIAL_SYMBOLS = frozenset(["$a", "$t", "$t.x", "$d", "$d.realdata"])


 def postprocess_symbols(config: Config, symbols: SymbolDF) -> SymbolDF:
     """Postprocess a symbol table after collecting from one source.

     If the symbol table contains FILE symbols, they will be removed and
     replaced by a 'file' column on other symbols.

     If the symbol table contains ARM mode symbols, they will be removed
     and replaced by an 'arm' column on other symbols.
     """
     files = []
     arms = []
     arm_symbols = {}
     current_file = ''
     current_arm = ''
     has_file = False
     if config['collect.prefix-file']:
         prefixes = config.get_re('collect.prefix')
     else:
         prefixes = None
     if 'type' in symbols.columns:
         for symbol in symbols.itertuples():
             if symbol.type == 'FILE':
                 has_file = True
                 current_file = symbol.symbol
                 if prefixes:
                     current_file = simplify_source(current_file, prefixes)

             elif symbol.type == 'NOTYPE':
                 if symbol.symbol.startswith('$'):
                     if current_arm or symbol.symbol in ARM_SPECIAL_SYMBOLS:
                         current_arm = symbol.symbol
                         arm_symbols[current_arm] = True
             files.append(current_file)
             arms.append(current_arm)

     if has_file:
         symbols['file'] = files
     if current_arm:
         symbols['arm'] = arms

     if has_file:
         symbols = symbols[symbols['type'] != 'FILE']
     if current_arm:
         syms = arm_symbols.keys()
         symbols = symbols[~symbols.symbol.isin(syms)]
     return symbols


 def postprocess_file(config: Config, dfs: DFs) -> None:
     """Postprocess tables after collecting from one source."""
     if SymbolDF.name in dfs:
         dfs[SymbolDF.name] = postprocess_symbols(config, dfs[SymbolDF.name])


 def fill_holes(config: Config, symbols: SymbolDF, sections: SectionDF) -> DFs:
     """Account for space not used by any symbol, or by multiple symbols."""

     # These symbols mark the start or end of unused space.
     start_unused = frozenset(config.get('symbol.free.start', []))
     end_unused = frozenset(config.get('symbol.free.end', []))

     extent_columns = ['address', 'size', 'section', 'file']
     need_cu = 'cu' in symbols.columns
     if need_cu:
         extent_columns.append('cu')
     need_input = 'input' in symbols.columns
     if need_input:
         extent_columns.append('input')
     columns = ['symbol', *extent_columns, 'type', 'bind']

     def filler(name, address, size, previous, current) -> List:
         row = [
             name,  # symbol
             address,  # address
             size,  # size
             (previous.section if previous else
              current.section if current else memdf.name.UNDEF),  # section
             (previous.file
              if previous else current.file if current else ''),  # file
         ]
         if need_cu:
             row.append(
                 previous.cu if previous else current.cu if current else '')
         if need_input:
             row.append(previous.input if previous else current.
                        input if current else '')
         row.append('NOTYPE')  # type
         row.append('LOCAL')  # bind
         return row

     def fill_gap(previous, current, from_address,
                  to_address) -> Tuple[str, List]:
         """Add a row for a unaccounted gap or unused space."""
         size = to_address - from_address
         if (previous is None or previous.symbol in start_unused
                 or current.symbol in end_unused):
             use = 'unused'
             name = memdf.name.unused(from_address, size)
         else:
             use = 'gap'
             name = memdf.name.gap(from_address, size)
         return (use, filler(name, from_address, size, previous, current))

     def fill_overlap(previous, current, from_address,
                      to_address) -> Tuple[str, List]:
         """Add a row for overlap."""
         size = to_address - from_address
         return ('overlap',
                 filler(memdf.name.overlap(from_address, -size), from_address,
                        size, previous, current))

     # Find the address range for sections that are configured or allocated.
     config_sections = set()
     for _, s in config.get('region.sections', {}).items():
         config_sections |= set(s)
     section_to_range = {}
     start_to_section = {}
     section_starts = [0]
     for s in sections.itertuples():
         if ((s.section in config_sections) or (s.flags & SH_FLAGS.SHF_ALLOC)):
             section_to_range[s.section] = range(s.address, s.address + s.size)
             start_to_section[s.address] = s.section
             section_starts.append(s.address)
     section_starts.sort()

     new_symbols: Dict[str, List[list]] = {
         'gap': [],
         'unused': [],
         'overlap': []
     }
     section_range = None
     previous_symbol = None
     current_address = 0
     iterable_symbols = symbols.loc[(symbols.type != 'SECTION')
                                    & (symbols.type != 'FILE')
                                    & symbols.section.isin(section_to_range)]
     iterable_symbols = iterable_symbols.sort_values(by='address')

     for symbol in iterable_symbols.itertuples():
         if not previous_symbol or symbol.section != previous_symbol.section:
             # We sometimes see symbols that have the value of their section end
             # address (so they are not actually within the section) and have
             # the same address as a symbol in the next section.
             symbol_address_section = start_to_section.get(section_starts[
                 bisect.bisect_right(section_starts, symbol.address) - 1])
             if symbol_address_section != symbol.section:
                 continue
             # Starting or switching sections.
             if previous_symbol and section_range:
                 # previous_symbol is the last in its section.
                 if current_address < section_range[-1] + 1:
                     use, row = fill_gap(previous_symbol, previous_symbol,
                                         current_address, section_range[-1] + 1)
                     new_symbols[use].append(row)
             # Start of section.
             previous_symbol = None
             section_range = section_to_range.get(symbol.section)
             if section_range:
                 current_address = section_range[0]
         if section_range:
             if current_address < symbol.address:
                 use, row = fill_gap(previous_symbol, symbol, current_address,
                                     symbol.address)
                 new_symbols[use].append(row)
             elif current_address > symbol.address:
                 use, row = fill_overlap(previous_symbol, symbol,
                                         current_address, symbol.address)
                 new_symbols[use].append(row)
         current_address = symbol.address + symbol.size
         previous_symbol = symbol

     dfs = {k: SymbolDF(new_symbols[k], columns=columns) for k in new_symbols}
     symbols = pd.concat([symbols, *dfs.values()]).fillna('')
     symbols.sort_values(by='address', inplace=True)
     for k in dfs:
         dfs[k] = ExtentDF(dfs[k][extent_columns])
         dfs[k].attrs['name'] = k
     dfs[SymbolDF.name] = SymbolDF(symbols)
     return dfs


 def postprocess_collected(config: Config, dfs: DFs) -> None:
     """Postprocess tables after reading all sources."""

     # Prune tables according to configuration options. This happens before
     # fill_holes() so that space of any pruned symbols will be accounted for,
     # and to avoid unnecessary work for pruned sections.
     for c in [SymbolDF, SectionDF]:
         if c.name in dfs:
             dfs[c.name] = memdf.select.select_configured(
                 config, dfs[c.name], memdf.select.COLLECTED_CHOICES)

     # Account for space not used by any symbol, or by multiple symbols.
     if (SymbolDF.name in dfs and SectionDF.name in dfs
             and config.get('args.fill_holes', True)):
         dfs.update(fill_holes(config, dfs[SymbolDF.name], dfs[SectionDF.name]))

     # Create synthetic columns (e.g. 'region') and prune tables
     # according to their configuration. This happens after fill_holes()
     # so that synthetic column values will be created for the gap symbols.
     for c in [SymbolDF, SectionDF]:
         if c.name in dfs:
             for column in memdf.select.SYNTHETIC_CHOICES:
                 dfs[c.name] = memdf.select.synthesize_column(
                     config, dfs[c.name], column)
                 dfs[c.name] = memdf.select.select_configured_column(
                     config, dfs[c.name], column)

     for df in dfs.values():
         if demangle := set((c for c in df.columns if c.endswith('symbol'))):
             df.attrs['demangle'] = demangle
         if hexify := set((c for c in df.columns if c.endswith('address'))):
             df.attrs['hexify'] = hexify


 FileReader = Callable[[Config, str, str], DFs]

 FILE_READERS: Dict[str, FileReader] = {
     'bloaty': memdf.collector.bloaty.read_file,
     'elftools': memdf.collector.elftools.read_file,
     'readelf': memdf.collector.readelf.read_file,
     'csv': memdf.collector.csv.read_file,
     'tsv': memdf.collector.csv.read_file,
     'su': memdf.collector.su.read_dir,
 }


 def collect_files(config: Config,
                   files: Optional[List[str]] = None,
                   method: Optional[str] = None) -> DFs:
     """Read a filtered memory map from a set of files."""
     filenames = files if files else config.get('args.inputs', [])
     if method is None:
         method = config.get('collect.method', 'csv')
     frames: Dict[str, List[DF]] = {}
     for filename in filenames:
         dfs: DFs = FILE_READERS[method](config, filename, method)
         postprocess_file(config, dfs)
         for k, frame in dfs.items():
             if k not in frames:
                 frames[k] = []
             frames[k].append(frame)
     dfs = {}
     for k, v in frames.items():
         dfs[k] = pd.concat(v, ignore_index=True)
     postprocess_collected(config, dfs)
     return dfs


 def parse_args(config_desc: Mapping, argv: Sequence[str]) -> Config:
     """Common argument parsing for collection tools."""
     config = Config().init({
         **memdf.util.config.CONFIG,
         **CONFIG,
         **config_desc
     })
     config.argparse.add_argument('inputs', metavar='FILE', nargs='+')
     return config.parse(argv)
	#
	# Copyright (c) 2021 Project CHIP Authors
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	"""Collect information from various sources into Memory Map DataFrames."""

	import bisect

	from typing import Callable, Dict, List, Mapping, Optional, Sequence, Tuple

	from elftools.elf.constants import SH_FLAGS # type: ignore
	import pandas as pd # type: ignore

	import memdf.collector.bloaty
	import memdf.collector.csv
	import memdf.collector.elftools
	import memdf.collector.readelf
	import memdf.collector.su
	import memdf.name
	import memdf.select
	import memdf.util.config

	from memdf import (Config, ConfigDescription, DF, DFs, SymbolDF, SectionDF,
	ExtentDF)
	from memdf.collector.util import simplify_source

	PREFIX_CONFIG: ConfigDescription = {
	'collect.prefix': {
	'help': 'Strip PATH from the beginning of source file names',
	'metavar': 'PATH',
	'default': [],
	'argparse': {
	'alias': ['--prefix', '--strip-prefix'],
	'action': 'append',
	}
	},
	}

	CONFIG: ConfigDescription = {
	Config.group_def('input'): {
	'title': 'input options',
	},
	Config.group_def('tool'): {
	'title': 'external tool options',
	},
	Config.group_map('collect'): {
	'group': 'input'
	},
	**memdf.collector.bloaty.CONFIG,
	**memdf.collector.csv.CONFIG,
	**memdf.collector.elftools.CONFIG,
	**memdf.collector.readelf.CONFIG,
	'collect.method': {
	'help':
	'Method of input processing: one of'
	' elftools, readelf, bloaty, csv, tsv, su.',
	'metavar': 'METHOD',
	'choices': ['elftools', 'readelf', 'bloaty', 'csv', 'tsv', 'su'],
	'default': 'elftools',
	'argparse': {
	'alias': ['-f'],
	},
	},
	**PREFIX_CONFIG,
	}

	ARM_SPECIAL_SYMBOLS = frozenset(["$a", "$t", "$t.x", "$d", "$d.realdata"])


	def postprocess_symbols(config: Config, symbols: SymbolDF) -> SymbolDF:
	"""Postprocess a symbol table after collecting from one source.

	If the symbol table contains FILE symbols, they will be removed and
	replaced by a 'file' column on other symbols.

	If the symbol table contains ARM mode symbols, they will be removed
	and replaced by an 'arm' column on other symbols.
	"""
	files = []
	arms = []
	arm_symbols = {}
	current_file = ''
	current_arm = ''
	has_file = False
	if config['collect.prefix-file']:
	prefixes = config.get_re('collect.prefix')
	else:
	prefixes = None
	if 'type' in symbols.columns:
	for symbol in symbols.itertuples():
	if symbol.type == 'FILE':
	has_file = True
	current_file = symbol.symbol
	if prefixes:
	current_file = simplify_source(current_file, prefixes)

	elif symbol.type == 'NOTYPE':
	if symbol.symbol.startswith('$'):
	if current_arm or symbol.symbol in ARM_SPECIAL_SYMBOLS:
	current_arm = symbol.symbol
	arm_symbols[current_arm] = True
	files.append(current_file)
	arms.append(current_arm)

	if has_file:
	symbols['file'] = files
	if current_arm:
	symbols['arm'] = arms

	if has_file:
	symbols = symbols[symbols['type'] != 'FILE']
	if current_arm:
	syms = arm_symbols.keys()
	symbols = symbols[~symbols.symbol.isin(syms)]
	return symbols


	def postprocess_file(config: Config, dfs: DFs) -> None:
	"""Postprocess tables after collecting from one source."""
	if SymbolDF.name in dfs:
	dfs[SymbolDF.name] = postprocess_symbols(config, dfs[SymbolDF.name])


	def fill_holes(config: Config, symbols: SymbolDF, sections: SectionDF) -> DFs:
	"""Account for space not used by any symbol, or by multiple symbols."""

	# These symbols mark the start or end of unused space.
	start_unused = frozenset(config.get('symbol.free.start', []))
	end_unused = frozenset(config.get('symbol.free.end', []))

	extent_columns = ['address', 'size', 'section', 'file']
	need_cu = 'cu' in symbols.columns
	if need_cu:
	extent_columns.append('cu')
	need_input = 'input' in symbols.columns
	if need_input:
	extent_columns.append('input')
	columns = ['symbol', *extent_columns, 'type', 'bind']

	def filler(name, address, size, previous, current) -> List:
	row = [
	name, # symbol
	address, # address
	size, # size
	(previous.section if previous else
	current.section if current else memdf.name.UNDEF), # section
	(previous.file
	if previous else current.file if current else ''), # file
	]
	if need_cu:
	row.append(
	previous.cu if previous else current.cu if current else '')
	if need_input:
	row.append(previous.input if previous else current.
	input if current else '')
	row.append('NOTYPE') # type
	row.append('LOCAL') # bind
	return row

	def fill_gap(previous, current, from_address,
	to_address) -> Tuple[str, List]:
	"""Add a row for a unaccounted gap or unused space."""
	size = to_address - from_address
	if (previous is None or previous.symbol in start_unused
	or current.symbol in end_unused):
	use = 'unused'
	name = memdf.name.unused(from_address, size)
	else:
	use = 'gap'
	name = memdf.name.gap(from_address, size)
	return (use, filler(name, from_address, size, previous, current))

	def fill_overlap(previous, current, from_address,
	to_address) -> Tuple[str, List]:
	"""Add a row for overlap."""
	size = to_address - from_address
	return ('overlap',
	filler(memdf.name.overlap(from_address, -size), from_address,
	size, previous, current))

	# Find the address range for sections that are configured or allocated.
	config_sections = set()
	for _, s in config.get('region.sections', {}).items():
	config_sections \|= set(s)
	section_to_range = {}
	start_to_section = {}
	section_starts = [0]
	for s in sections.itertuples():
	if ((s.section in config_sections) or (s.flags & SH_FLAGS.SHF_ALLOC)):
	section_to_range[s.section] = range(s.address, s.address + s.size)
	start_to_section[s.address] = s.section
	section_starts.append(s.address)
	section_starts.sort()

	new_symbols: Dict[str, List[list]] = {
	'gap': [],
	'unused': [],
	'overlap': []
	}
	section_range = None
	previous_symbol = None
	current_address = 0
	iterable_symbols = symbols.loc[(symbols.type != 'SECTION')
	& (symbols.type != 'FILE')
	& symbols.section.isin(section_to_range)]
	iterable_symbols = iterable_symbols.sort_values(by='address')

	for symbol in iterable_symbols.itertuples():
	if not previous_symbol or symbol.section != previous_symbol.section:
	# We sometimes see symbols that have the value of their section end
	# address (so they are not actually within the section) and have
	# the same address as a symbol in the next section.
	symbol_address_section = start_to_section.get(section_starts[
	bisect.bisect_right(section_starts, symbol.address) - 1])
	if symbol_address_section != symbol.section:
	continue
	# Starting or switching sections.
	if previous_symbol and section_range:
	# previous_symbol is the last in its section.
	if current_address < section_range[-1] + 1:
	use, row = fill_gap(previous_symbol, previous_symbol,
	current_address, section_range[-1] + 1)
	new_symbols[use].append(row)
	# Start of section.
	previous_symbol = None
	section_range = section_to_range.get(symbol.section)
	if section_range:
	current_address = section_range[0]
	if section_range:
	if current_address < symbol.address:
	use, row = fill_gap(previous_symbol, symbol, current_address,
	symbol.address)
	new_symbols[use].append(row)
	elif current_address > symbol.address:
	use, row = fill_overlap(previous_symbol, symbol,
	current_address, symbol.address)
	new_symbols[use].append(row)
	current_address = symbol.address + symbol.size
	previous_symbol = symbol

	dfs = {k: SymbolDF(new_symbols[k], columns=columns) for k in new_symbols}
	symbols = pd.concat([symbols, *dfs.values()]).fillna('')
	symbols.sort_values(by='address', inplace=True)
	for k in dfs:
	dfs[k] = ExtentDF(dfs[k][extent_columns])
	dfs[k].attrs['name'] = k
	dfs[SymbolDF.name] = SymbolDF(symbols)
	return dfs


	def postprocess_collected(config: Config, dfs: DFs) -> None:
	"""Postprocess tables after reading all sources."""

	# Prune tables according to configuration options. This happens before
	# fill_holes() so that space of any pruned symbols will be accounted for,
	# and to avoid unnecessary work for pruned sections.
	for c in [SymbolDF, SectionDF]:
	if c.name in dfs:
	dfs[c.name] = memdf.select.select_configured(
	config, dfs[c.name], memdf.select.COLLECTED_CHOICES)

	# Account for space not used by any symbol, or by multiple symbols.
	if (SymbolDF.name in dfs and SectionDF.name in dfs
	and config.get('args.fill_holes', True)):
	dfs.update(fill_holes(config, dfs[SymbolDF.name], dfs[SectionDF.name]))

	# Create synthetic columns (e.g. 'region') and prune tables
	# according to their configuration. This happens after fill_holes()
	# so that synthetic column values will be created for the gap symbols.
	for c in [SymbolDF, SectionDF]:
	if c.name in dfs:
	for column in memdf.select.SYNTHETIC_CHOICES:
	dfs[c.name] = memdf.select.synthesize_column(
	config, dfs[c.name], column)
	dfs[c.name] = memdf.select.select_configured_column(
	config, dfs[c.name], column)

	for df in dfs.values():
	if demangle := set((c for c in df.columns if c.endswith('symbol'))):
	df.attrs['demangle'] = demangle
	if hexify := set((c for c in df.columns if c.endswith('address'))):
	df.attrs['hexify'] = hexify


	FileReader = Callable[[Config, str, str], DFs]

	FILE_READERS: Dict[str, FileReader] = {
	'bloaty': memdf.collector.bloaty.read_file,
	'elftools': memdf.collector.elftools.read_file,
	'readelf': memdf.collector.readelf.read_file,
	'csv': memdf.collector.csv.read_file,
	'tsv': memdf.collector.csv.read_file,
	'su': memdf.collector.su.read_dir,
	}


	def collect_files(config: Config,
	files: Optional[List[str]] = None,
	method: Optional[str] = None) -> DFs:
	"""Read a filtered memory map from a set of files."""
	filenames = files if files else config.get('args.inputs', [])
	if method is None:
	method = config.get('collect.method', 'csv')
	frames: Dict[str, List[DF]] = {}
	for filename in filenames:
	dfs: DFs = FILE_READERS[method](config, filename, method)
	postprocess_file(config, dfs)
	for k, frame in dfs.items():
	if k not in frames:
	frames[k] = []
	frames[k].append(frame)
	dfs = {}
	for k, v in frames.items():
	dfs[k] = pd.concat(v, ignore_index=True)
	postprocess_collected(config, dfs)
	return dfs


	def parse_args(config_desc: Mapping, argv: Sequence[str]) -> Config:
	"""Common argument parsing for collection tools."""
	config = Config().init({
	**memdf.util.config.CONFIG,
	**CONFIG,
	**config_desc
	})
	config.argparse.add_argument('inputs', metavar='FILE', nargs='+')
	return config.parse(argv)