blob: d70843e0ffbc400ab36fe422d9c8c771cd5a3e59 [file] [log] [blame]
Daniel Leunga5ab1a72021-04-02 12:54:53 -07001#!/usr/bin/env python3
2#
3# Copyright (c) 2020 Intel Corporation
4#
5# SPDX-License-Identifier: Apache-2.0
6
7"""
8Dictionary-based Logging Database Generator
9
10This takes the built Zephyr ELF binary and produces a JSON database
11file for dictionary-based logging. This database is used together
12with the parser to decode binary log messages.
13"""
14
15import argparse
16import logging
17import os
Torsten Rasmussen91709772022-02-04 10:27:13 +010018import re
Daniel Leungcaca5482022-01-12 11:56:05 -080019import string
Daniel Leunga5ab1a72021-04-02 12:54:53 -070020import struct
21import sys
22
David Leachf5b0f922021-06-23 15:18:22 -050023import dictionary_parser.log_database
24from dictionary_parser.log_database import LogDatabase
Daniel Leungcaca5482022-01-12 11:56:05 -080025from dictionary_parser.utils import extract_one_string_in_section
Daniel Leung2e2c8cc2022-04-07 14:41:59 -070026from dictionary_parser.utils import find_string_in_mappings
Daniel Leunga5ab1a72021-04-02 12:54:53 -070027
28import elftools
Daniel Leungcaca5482022-01-12 11:56:05 -080029from elftools.elf.constants import SH_FLAGS
Daniel Leunga5ab1a72021-04-02 12:54:53 -070030from elftools.elf.elffile import ELFFile
31from elftools.elf.descriptions import describe_ei_data
32from elftools.elf.sections import SymbolTableSection
Daniel Leungcaca5482022-01-12 11:56:05 -080033from elftools.dwarf.descriptions import (
34 describe_DWARF_expr
35)
36from elftools.dwarf.locationlists import (
37 LocationExpr, LocationParser
38)
Daniel Leunga5ab1a72021-04-02 12:54:53 -070039
40
41LOGGER_FORMAT = "%(name)s: %(levelname)s: %(message)s"
42logger = logging.getLogger(os.path.basename(sys.argv[0]))
43
44
45# Sections that contains static strings
Daniel Leung0c322562022-01-14 14:35:11 -080046STATIC_STRING_SECTIONS = [
47 'rodata',
48 '.rodata',
49 'pinned.rodata',
Daniel Leungcaca5482022-01-12 11:56:05 -080050]
51
52
53# Regulation expression to match DWARF location
54DT_LOCATION_REGEX = re.compile(r"\(DW_OP_addr: ([0-9a-f]+)")
55
56
57# Format string for pointers (default for 32-bit pointers)
58PTR_FMT = '0x%08x'
59
60
61# Potential string encodings. Add as needed.
62STR_ENCODINGS = [
63 'ascii',
64 'iso-8859-1',
65]
66
67
68# List of acceptable escape character
69ACCEPTABLE_ESCAPE_CHARS = [
70 b'\r',
71 b'\n',
Daniel Leung0c322562022-01-14 14:35:11 -080072]
Daniel Leunga5ab1a72021-04-02 12:54:53 -070073
74
75def parse_args():
76 """Parse command line arguments"""
77 argparser = argparse.ArgumentParser()
78
79 argparser.add_argument("elffile", help="Zephyr ELF binary")
Daniel Leunga5ab1a72021-04-02 12:54:53 -070080 argparser.add_argument("--build", help="Build ID")
Torsten Rasmussen91709772022-02-04 10:27:13 +010081 argparser.add_argument("--build-header",
82 help="Header file containing BUILD_VERSION define")
Daniel Leunga5ab1a72021-04-02 12:54:53 -070083 argparser.add_argument("--debug", action="store_true",
84 help="Print extra debugging information")
85 argparser.add_argument("-v", "--verbose", action="store_true",
86 help="Print more information")
87
Daniel Leung1c9e89c2022-01-26 14:44:55 -080088 outfile_grp = argparser.add_mutually_exclusive_group(required=True)
89 outfile_grp.add_argument("--json",
90 help="Output Dictionary Logging Database file in JSON")
Daniel Leung0c600d72022-01-27 11:26:09 -080091 outfile_grp.add_argument("--syst",
92 help="Output MIPI Sys-T Collateral XML file")
Daniel Leung1c9e89c2022-01-26 14:44:55 -080093
Daniel Leunga5ab1a72021-04-02 12:54:53 -070094 return argparser.parse_args()
95
96
Daniel Leungcaca5482022-01-12 11:56:05 -080097def extract_elf_code_data_sections(elf):
98 """Find all sections in ELF file"""
99 sections = {}
100
101 for sect in elf.iter_sections():
102 # Only Allocated sections with PROGBITS are included
103 # since they actually have code/data.
104 #
105 # On contrary, BSS is allocated but NOBITS.
106 if (
107 (sect['sh_flags'] & SH_FLAGS.SHF_ALLOC) == SH_FLAGS.SHF_ALLOC
108 and sect['sh_type'] == 'SHT_PROGBITS'
109 ):
110 sections[sect.name] = {
111 'name' : sect.name,
112 'size' : sect['sh_size'],
113 'start' : sect['sh_addr'],
114 'end' : sect['sh_addr'] + sect['sh_size'] - 1,
115 'data' : sect.data(),
116 }
117
118 return sections
119
120
Daniel Leunga5ab1a72021-04-02 12:54:53 -0700121def find_elf_sections(elf, sh_name):
122 """Find all sections in ELF file"""
123 for section in elf.iter_sections():
124 if section.name == sh_name:
125 ret = {
126 'name' : section.name,
127 'size' : section['sh_size'],
128 'start' : section['sh_addr'],
129 'end' : section['sh_addr'] + section['sh_size'] - 1,
130 'data' : section.data(),
131 }
132
133 return ret
134
135 return None
136
137
138def get_kconfig_symbols(elf):
139 """Get kconfig symbols from the ELF file"""
140 for section in elf.iter_sections():
141 if isinstance(section, SymbolTableSection):
142 return {sym.name: sym.entry.st_value
143 for sym in section.iter_symbols()
144 if sym.name.startswith("CONFIG_")}
145
146 raise LookupError("Could not find symbol table")
147
148
149def find_log_const_symbols(elf):
150 """Extract all "log_const_*" symbols from ELF file"""
151 symbol_tables = [s for s in elf.iter_sections()
152 if isinstance(s, elftools.elf.sections.SymbolTableSection)]
153
154 ret_list = []
155
156 for section in symbol_tables:
157 if not isinstance(section, elftools.elf.sections.SymbolTableSection):
158 continue
159
160 if section['sh_entsize'] == 0:
161 continue
162
163 for symbol in section.iter_symbols():
164 if symbol.name.startswith("log_const_"):
165 ret_list.append(symbol)
166
167 return ret_list
168
169
Daniel Leung2e2c8cc2022-04-07 14:41:59 -0700170def parse_log_const_symbols(database, log_const_section, log_const_symbols, string_mappings):
Daniel Leunga5ab1a72021-04-02 12:54:53 -0700171 """Find the log instances and map source IDs to names"""
172 if database.is_tgt_little_endian():
173 formatter = "<"
174 else:
175 formatter = ">"
176
177 if database.is_tgt_64bit():
178 # 64-bit pointer to string
179 formatter += "Q"
180 else:
181 # 32-bit pointer to string
182 formatter += "L"
183
184 # log instance level
185 formatter += "B"
186
187 datum_size = struct.calcsize(formatter)
188
189 # Get the address of first log instance
190 first_offset = log_const_symbols[0].entry['st_value']
191 for sym in log_const_symbols:
192 if sym.entry['st_value'] < first_offset:
193 first_offset = sym.entry['st_value']
194
195 first_offset -= log_const_section['start']
196
197 # find all log_const_*
198 for sym in log_const_symbols:
199 # Find data offset in log_const_section for this symbol
200 offset = sym.entry['st_value'] - log_const_section['start']
201
202 idx_s = offset
203 idx_e = offset + datum_size
204
205 datum = log_const_section['data'][idx_s:idx_e]
206
207 if len(datum) != datum_size:
208 # Not enough data to unpack
209 continue
210
211 str_ptr, level = struct.unpack(formatter, datum)
212
213 # Offset to rodata section for string
Daniel Leung2e2c8cc2022-04-07 14:41:59 -0700214 instance_name = find_string_in_mappings(string_mappings, str_ptr)
215 if instance_name is None:
216 instance_name = "unknown"
Daniel Leunga5ab1a72021-04-02 12:54:53 -0700217
218 logger.info("Found Log Instance: %s, level: %d", instance_name, level)
219
220 # source ID is simply the element index in the log instance array
221 source_id = int((offset - first_offset) / sym.entry['st_size'])
222
223 database.add_log_instance(source_id, instance_name, level, sym.entry['st_value'])
224
225
226def extract_elf_information(elf, database):
227 """Extract information from ELF file and store in database"""
228 e_ident = elf.header['e_ident']
229 elf_data = describe_ei_data(e_ident['EI_DATA'])
230
231 if elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2LSB']:
232 database.set_tgt_endianness(LogDatabase.LITTLE_ENDIAN)
233 elif elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2MSB']:
234 database.set_tgt_endianness(LogDatabase.BIG_ENDIAN)
235 else:
236 logger.error("Cannot determine endianness from ELF file, exiting...")
237 sys.exit(1)
238
239
240def process_kconfigs(elf, database):
241 """Process kconfigs to extract information"""
242 kconfigs = get_kconfig_symbols(elf)
243
244 # 32 or 64-bit target
245 database.set_tgt_bits(64 if "CONFIG_64BIT" in kconfigs else 32)
246
247 # Architecture
David Leachf5b0f922021-06-23 15:18:22 -0500248 for name, arch in dictionary_parser.log_database.ARCHS.items():
Daniel Leunga5ab1a72021-04-02 12:54:53 -0700249 if arch['kconfig'] in kconfigs:
250 database.set_arch(name)
251 break
252
253 # Put some kconfigs into the database
254 #
255 # Use 32-bit timestamp? or 64-bit?
256 if "CONFIG_LOG_TIMESTAMP_64BIT" in kconfigs:
257 database.add_kconfig("CONFIG_LOG_TIMESTAMP_64BIT",
258 kconfigs['CONFIG_LOG_TIMESTAMP_64BIT'])
259
260
Daniel Leung2e2c8cc2022-04-07 14:41:59 -0700261def extract_logging_subsys_information(elf, database, string_mappings):
Daniel Leunga5ab1a72021-04-02 12:54:53 -0700262 """
263 Extract logging subsys related information and store in database.
264
265 For example, this extracts the list of log instances to establish
266 mapping from source ID to name.
267 """
268 # Extract log constant section for module names
269 section_log_const = find_elf_sections(elf, "log_const_sections")
270 if section_log_const is None:
271 # ESP32 puts "log_const_*" info log_static_section instead of log_const_sections
272 section_log_const = find_elf_sections(elf, "log_static_section")
273
274 if section_log_const is None:
275 logger.error("Cannot find section 'log_const_sections' in ELF file, exiting...")
276 sys.exit(1)
277
278 # Find all "log_const_*" symbols and parse them
279 log_const_symbols = find_log_const_symbols(elf)
Daniel Leung2e2c8cc2022-04-07 14:41:59 -0700280 parse_log_const_symbols(database, section_log_const, log_const_symbols, string_mappings)
Daniel Leunga5ab1a72021-04-02 12:54:53 -0700281
282
Daniel Leungcaca5482022-01-12 11:56:05 -0800283def is_die_attr_ref(attr):
284 """
285 Returns True if the DIE attribute is a reference.
286 """
287 return bool(attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2',
288 'DW_FORM_ref4', 'DW_FORM_ref8',
289 'DW_FORM_ref'))
290
291
292def find_die_var_base_type(compile_unit, die, is_const):
293 """
294 Finds the base type of a DIE and returns the name.
295 If DW_AT_type is a reference, it will recursively go through
296 the references to find the base type. Returns None is no
297 base type is found.
298 """
299 # DIE is of base type. So extract the name.
300 if die.tag == 'DW_TAG_base_type':
301 return die.attributes['DW_AT_name'].value.decode('ascii'), is_const
302
303 # Not a type, cannot continue
304 if not 'DW_AT_type' in die.attributes:
305 return None, None
306
307 if die.tag == 'DW_TAG_const_type':
308 is_const = True
309
310 # DIE is probably a reference to another.
311 # If so, check if the reference is a base type.
312 type_attr = die.attributes['DW_AT_type']
313 if is_die_attr_ref(type_attr):
314 ref_addr = compile_unit.cu_offset + type_attr.raw_value
315 ref_die = compile_unit.get_DIE_from_refaddr(ref_addr)
316
317 return find_die_var_base_type(compile_unit, ref_die, is_const)
318
319 # Not a base type, and not reference
320 return None, None
321
322
323def is_die_var_const_char(compile_unit, die):
324 """
325 Returns True if DIE of type variable is const char.
326 """
327 var_type, is_const = find_die_var_base_type(compile_unit, die, False)
328
329 if var_type is not None and var_type.endswith('char') and is_const:
330 return True
331
332 return False
333
334
335def extract_string_variables(elf):
336 """
337 Find all string variables (char) in all Compilation Units and
338 Debug information Entry (DIE) in ELF file.
339 """
340 dwarf_info = elf.get_dwarf_info()
341 loc_lists = dwarf_info.location_lists()
342 loc_parser = LocationParser(loc_lists)
343
344 strings = []
345
346 # Loop through all Compilation Units and
347 # Debug information Entry (DIE) to extract all string variables
348 for compile_unit in dwarf_info.iter_CUs():
349 for die in compile_unit.iter_DIEs():
350 # Only care about variables with location information
351 # and of type "char"
352 if die.tag == 'DW_TAG_variable':
353 if ('DW_AT_type' in die.attributes
354 and 'DW_AT_location' in die.attributes
355 and is_die_var_const_char(compile_unit, die)
356 ):
357 # Extract location information, which is
358 # its address in memory.
359 loc_attr = die.attributes['DW_AT_location']
360 if loc_parser.attribute_has_location(loc_attr, die.cu['version']):
361 loc = loc_parser.parse_from_attribute(loc_attr, die.cu['version'])
362 if isinstance(loc, LocationExpr):
363 try:
364 addr = describe_DWARF_expr(loc.loc_expr,
365 dwarf_info.structs)
366
367 matcher = DT_LOCATION_REGEX.match(addr)
368 if matcher:
369 addr = int(matcher.group(1), 16)
370 if addr > 0:
371 strings.append({
372 'name': die.attributes['DW_AT_name'].value,
373 'addr': addr,
374 'die': die
375 })
376 except KeyError:
377 pass
378
379 return strings
380
381
382def try_decode_string(str_maybe):
383 """Check if it is a printable string"""
384 for encoding in STR_ENCODINGS:
385 try:
386 decoded_str = str_maybe.decode(encoding)
387
388 # Check if string is printable according to Python
389 # since the parser (written in Python) will need to
390 # print the string.
391 #
392 # Note that '\r' and '\n' are not included in
393 # string.printable so they need to be checked separately.
394 printable = True
395 for one_char in decoded_str:
396 if (one_char not in string.printable
397 and one_char not in ACCEPTABLE_ESCAPE_CHARS):
398 printable = False
399 break
400
401 if printable:
402 return decoded_str
403 except UnicodeDecodeError:
404 pass
405
406 return None
407
408
409def extract_strings_in_one_section(section, str_mappings):
410 """Extract NULL-terminated strings in one ELF section"""
411 bindata = section['data']
412
413 if len(bindata) < 2:
414 # Can't have a NULL-terminated string with fewer than 2 bytes.
415 return str_mappings
416
417 idx = 0
418
419 # If first byte is not NULL, it may be a string.
420 if bindata[0] == 0:
421 start = None
422 else:
423 start = 0
424
425 while idx < len(bindata):
426 if start is None:
427 if bindata[idx] == 0:
428 # Skip NULL bytes to find next string
429 idx += 1
430 else:
431 # Beginning of possible string
432 start = idx
433 idx += 1
434 else:
435 if bindata[idx] != 0:
436 # Skipping till next NULL byte for possible string
437 idx += 1
438 else:
439 # End of possible string
440 end = idx
441
442 if start != end:
443 str_maybe = bindata[start:end]
444 decoded_str = try_decode_string(str_maybe)
445
446 # Only store readable string
447 if decoded_str is not None:
448 addr = section['start'] + start
449
450 if addr not in str_mappings:
451 str_mappings[addr] = decoded_str
452
453 # Decoded string may contain un-printable characters
454 # (e.g. extended ASC-II characters) or control
455 # characters (e.g. '\r' or '\n'), so simply print
456 # the byte string instead.
457 logger.debug('Found string via extraction at ' + PTR_FMT + ': %s',
458 addr, str_maybe)
459
460 # GCC-based toolchain will reuse the NULL character
461 # for empty strings. There is no way to know which
462 # one is being reused, so just treat all NULL character
463 # at the end of legitimate strings as empty strings.
464 null_addr = section['start'] + end
465 str_mappings[null_addr] = ''
466
467 logger.debug('Found null string via extraction at ' + PTR_FMT,
468 null_addr)
469
470 start = None
471 idx += 1
472
473 return str_mappings
474
475
Daniel Leung0c600d72022-01-27 11:26:09 -0800476def extract_static_strings(elf, database, section_extraction=False):
Daniel Leungcaca5482022-01-12 11:56:05 -0800477 """
478 Extract static strings from ELF file using DWARF,
479 and also extraction from binary data.
480 """
481 string_mappings = {}
482
483 elf_sections = extract_elf_code_data_sections(elf)
484
485 # Extract strings using ELF DWARF information
486 str_vars = extract_string_variables(elf)
487 for str_var in str_vars:
488 for _, sect in elf_sections.items():
489 one_str = extract_one_string_in_section(sect, str_var['addr'])
490 if one_str is not None:
491 string_mappings[str_var['addr']] = one_str
492 logger.debug('Found string variable at ' + PTR_FMT + ': %s',
493 str_var['addr'], one_str)
494 break
495
Daniel Leung0c600d72022-01-27 11:26:09 -0800496 if section_extraction:
497 # Extract strings from ELF sections
498 string_sections = STATIC_STRING_SECTIONS
499 rawstr_map = {}
Daniel Leungcaca5482022-01-12 11:56:05 -0800500
Daniel Leung0c600d72022-01-27 11:26:09 -0800501 # Some architectures may put static strings into additional sections.
502 # So need to extract them too.
503 arch_data = dictionary_parser.log_database.ARCHS[database.get_arch()]
504 if "extra_string_section" in arch_data:
505 string_sections.extend(arch_data['extra_string_section'])
Daniel Leungcaca5482022-01-12 11:56:05 -0800506
Daniel Leung0c600d72022-01-27 11:26:09 -0800507 for sect_name in string_sections:
508 if sect_name in elf_sections:
509 rawstr_map = extract_strings_in_one_section(elf_sections[sect_name],
510 rawstr_map)
511
512 for one_str in rawstr_map:
513 if one_str not in string_mappings:
514 string_mappings[one_str] = rawstr_map[one_str]
Daniel Leungcaca5482022-01-12 11:56:05 -0800515
Daniel Leung2e2c8cc2022-04-07 14:41:59 -0700516 return string_mappings
Daniel Leungcaca5482022-01-12 11:56:05 -0800517
518
Daniel Leunga5ab1a72021-04-02 12:54:53 -0700519def main():
520 """Main function of database generator"""
521 args = parse_args()
522
523 # Setup logging
Daniel Leungcaca5482022-01-12 11:56:05 -0800524 logging.basicConfig(format=LOGGER_FORMAT, level=logging.WARNING)
525 if args.debug:
Daniel Leunga5ab1a72021-04-02 12:54:53 -0700526 logger.setLevel(logging.DEBUG)
Daniel Leungcaca5482022-01-12 11:56:05 -0800527 elif args.verbose:
528 logger.setLevel(logging.INFO)
Daniel Leunga5ab1a72021-04-02 12:54:53 -0700529
530 elffile = open(args.elffile, "rb")
531 if not elffile:
532 logger.error("ERROR: Cannot open ELF file: %s, exiting...", args.elffile)
533 sys.exit(1)
534
535 logger.info("ELF file %s", args.elffile)
Daniel Leung1c9e89c2022-01-26 14:44:55 -0800536
537 if args.json:
538 logger.info("JSON Database file %s", args.json)
Daniel Leung0c600d72022-01-27 11:26:09 -0800539 section_extraction = True
Daniel Leung0c600d72022-01-27 11:26:09 -0800540
541 if args.syst:
542 logger.info("MIPI Sys-T Collateral file %s", args.syst)
543 section_extraction = False
Daniel Leunga5ab1a72021-04-02 12:54:53 -0700544
545 elf = ELFFile(elffile)
546
547 database = LogDatabase()
548
Torsten Rasmussen91709772022-02-04 10:27:13 +0100549 if args.build_header:
550 with open(args.build_header) as f:
551 for l in f:
552 match = re.match(r'\s*#define\s+BUILD_VERSION\s+(.*)', l)
553 if match:
554 database.set_build_id(match.group(1))
555 break
556
Daniel Leunga5ab1a72021-04-02 12:54:53 -0700557 if args.build:
558 database.set_build_id(args.build)
559 logger.info("Build ID: %s", args.build)
560
561 extract_elf_information(elf, database)
562
563 process_kconfigs(elf, database)
564
565 logger.info("Target: %s, %d-bit", database.get_arch(), database.get_tgt_bits())
566 if database.is_tgt_little_endian():
567 logger.info("Endianness: Little")
568 else:
569 logger.info("Endianness: Big")
570
Daniel Leungcaca5482022-01-12 11:56:05 -0800571 if database.is_tgt_64bit():
572 global PTR_FMT
573 PTR_FMT = '0x%016x'
574
575 # Extract strings from ELF files
Daniel Leung2e2c8cc2022-04-07 14:41:59 -0700576 string_mappings = extract_static_strings(elf, database, section_extraction)
577 if len(string_mappings) > 0:
578 database.set_string_mappings(string_mappings)
579 logger.info("Found %d strings", len(string_mappings))
Daniel Leunga5ab1a72021-04-02 12:54:53 -0700580
581 # Extract information related to logging subsystem
Daniel Leung2e2c8cc2022-04-07 14:41:59 -0700582 if not section_extraction:
583 # The logging subsys information (e.g. log module names)
584 # may require additional strings outside of those extracted
585 # via ELF DWARF variables. So generate a new string mappings
586 # with strings in various ELF sections.
587 string_mappings = extract_static_strings(elf, database, section_extraction=True)
588
589 extract_logging_subsys_information(elf, database, string_mappings)
Daniel Leunga5ab1a72021-04-02 12:54:53 -0700590
591 # Write database file
Daniel Leung1c9e89c2022-01-26 14:44:55 -0800592 if args.json:
593 if not LogDatabase.write_json_database(args.json, database):
594 logger.error("ERROR: Cannot open database file for write: %s, exiting...",
595 args.json)
596 sys.exit(1)
Daniel Leunga5ab1a72021-04-02 12:54:53 -0700597
Daniel Leung0c600d72022-01-27 11:26:09 -0800598 if args.syst:
599 if not LogDatabase.write_syst_database(args.syst, database):
600 logger.error("ERROR: Cannot open database file for write: %s, exiting...",
601 args.syst)
602 sys.exit(1)
603
Daniel Leunga5ab1a72021-04-02 12:54:53 -0700604 elffile.close()
605
606
607if __name__ == "__main__":
608 main()