Andrew Boie | 945af95 | 2017-08-22 13:15:23 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # |
| 3 | # Copyright (c) 2017 Intel Corporation |
| 4 | # |
| 5 | # SPDX-License-Identifier: Apache-2.0 |
| 6 | |
| 7 | import sys |
| 8 | import argparse |
| 9 | import pprint |
| 10 | import os |
| 11 | import struct |
| 12 | from distutils.version import LooseVersion |
| 13 | |
| 14 | import elftools |
| 15 | from elftools.elf.elffile import ELFFile |
| 16 | from elftools.dwarf import descriptions |
| 17 | from elftools.elf.sections import SymbolTableSection |
| 18 | |
| 19 | if LooseVersion(elftools.__version__) < LooseVersion('0.24'): |
| 20 | sys.stderr.write("pyelftools is out of date, need version 0.24 or later\n") |
| 21 | sys.exit(1) |
| 22 | |
| 23 | kobjects = { |
| 24 | "k_alert" : "K_OBJ_ALERT", |
| 25 | "k_delayed_work" : "K_OBJ_DELAYED_WORK", |
| 26 | "k_mem_slab" : "K_OBJ_MEM_SLAB", |
| 27 | "k_msgq" : "K_OBJ_MSGQ", |
| 28 | "k_mutex" : "K_OBJ_MUTEX", |
| 29 | "k_pipe" : "K_OBJ_PIPE", |
| 30 | "k_sem" : "K_OBJ_SEM", |
| 31 | "k_stack" : "K_OBJ_STACK", |
| 32 | "k_thread" : "K_OBJ_THREAD", |
| 33 | "k_timer" : "K_OBJ_TIMER", |
| 34 | "k_work" : "K_OBJ_WORK", |
| 35 | "k_work_q" : "K_OBJ_WORK_Q", |
| 36 | } |
| 37 | |
| 38 | DW_OP_addr = 0x3 |
| 39 | DW_OP_fbreg = 0x91 |
| 40 | |
| 41 | # Global type environment. Populated by pass 1. |
| 42 | type_env = {} |
| 43 | |
| 44 | # --- debug stuff --- |
| 45 | |
| 46 | scr = os.path.basename(sys.argv[0]) |
| 47 | |
| 48 | def debug(text): |
| 49 | if not args.verbose: |
| 50 | return |
| 51 | sys.stdout.write(scr + ": " + text + "\n") |
| 52 | |
| 53 | def error(text): |
| 54 | sys.stderr.write("%s ERROR: %s\n" % (scr, text)) |
| 55 | sys.exit(1) |
| 56 | |
| 57 | def debug_die(die, text): |
| 58 | fn, ln = get_filename_lineno(die) |
| 59 | |
| 60 | debug(str(die)) |
| 61 | debug("File '%s', line %d:" % (fn, ln)) |
| 62 | debug(" %s" % text) |
| 63 | |
| 64 | |
| 65 | # --- type classes ---- |
| 66 | |
| 67 | class ArrayType: |
| 68 | def __init__(self, offset, num_members, member_type): |
| 69 | self.num_members = num_members |
| 70 | self.member_type = member_type |
| 71 | self.offset = offset |
| 72 | |
| 73 | def __repr__(self): |
| 74 | return "<array of %d, size %d>" % (self.member_type, self.num_members) |
| 75 | |
| 76 | def has_kobject(self): |
| 77 | if self.member_type not in type_env: |
| 78 | return False |
| 79 | |
| 80 | return type_env[self.member_type].has_kobject() |
| 81 | |
| 82 | def get_kobjects(self, addr): |
| 83 | mt = type_env[self.member_type] |
| 84 | objs = [] |
| 85 | |
| 86 | for i in range(self.num_members): |
| 87 | objs.extend(mt.get_kobjects(addr + (i * mt.size))) |
| 88 | return objs |
| 89 | |
| 90 | |
| 91 | class AggregateTypeMember: |
| 92 | def __init__(self, offset, member_name, member_type, member_offset): |
| 93 | self.member_name = member_name |
| 94 | self.member_type = member_type |
| 95 | self.member_offset = member_offset |
| 96 | |
| 97 | def __repr__(self): |
| 98 | return "<member %s, type %d, offset %d>" % (self.member_name, |
| 99 | self.member_type, self.member_offset) |
| 100 | |
| 101 | def has_kobject(self): |
| 102 | if self.member_type not in type_env: |
| 103 | return False |
| 104 | |
| 105 | return type_env[self.member_type].has_kobject() |
| 106 | |
| 107 | def get_kobjects(self, addr): |
| 108 | mt = type_env[self.member_type] |
| 109 | return mt.get_kobjects(addr + self.member_offset) |
| 110 | |
| 111 | |
| 112 | class AggregateType: |
| 113 | def __init__(self, offset, name, size): |
| 114 | self.name = name |
| 115 | self.size = size |
| 116 | self.offset = offset |
| 117 | self.members = [] |
| 118 | |
| 119 | def add_member(self, member): |
| 120 | self.members.append(member) |
| 121 | |
| 122 | def __repr__(self): |
| 123 | return "<struct %s, with %s>" % (self.name, self.members) |
| 124 | |
| 125 | def has_kobject(self): |
| 126 | result = False |
| 127 | |
| 128 | bad_members = [] |
| 129 | |
| 130 | for member in self.members: |
| 131 | if member.has_kobject(): |
| 132 | result = True |
| 133 | else: |
| 134 | bad_members.append(member) |
| 135 | # Don't need to consider this again, just remove it |
| 136 | |
| 137 | for bad_member in bad_members: |
| 138 | self.members.remove(bad_member) |
| 139 | |
| 140 | return result |
| 141 | |
| 142 | def get_kobjects(self, addr): |
| 143 | objs = [] |
| 144 | for member in self.members: |
| 145 | objs.extend(member.get_kobjects(addr)) |
| 146 | return objs |
| 147 | |
| 148 | |
| 149 | class KobjectType: |
| 150 | def __init__(self, offset, name, size): |
| 151 | self.name = name |
| 152 | self.size = size |
| 153 | self.offset = offset |
| 154 | |
| 155 | def __repr__(self): |
| 156 | return "<kobject %s>" % self.name |
| 157 | |
| 158 | def has_kobject(self): |
| 159 | return True |
| 160 | |
| 161 | def get_kobjects(self, addr): |
| 162 | return [(addr, kobjects[self.name])] |
| 163 | |
| 164 | # --- helper functions for getting data from DIEs --- |
| 165 | |
| 166 | def die_get_name(die): |
| 167 | if not 'DW_AT_name' in die.attributes: |
| 168 | return None |
| 169 | return die.attributes["DW_AT_name"].value.decode("utf-8") |
| 170 | |
| 171 | |
| 172 | def die_get_type_offset(die): |
| 173 | return die.attributes["DW_AT_type"].value + die.cu.cu_offset |
| 174 | |
| 175 | |
| 176 | def die_get_byte_size(die): |
| 177 | if not 'DW_AT_byte_size' in die.attributes: |
| 178 | return 0 |
| 179 | |
| 180 | return die.attributes["DW_AT_byte_size"].value |
| 181 | |
| 182 | def analyze_die_struct(die): |
| 183 | name = die_get_name(die) or "<anon>" |
| 184 | offset = die.offset |
| 185 | size = die_get_byte_size(die) |
| 186 | |
| 187 | # Incomplete type |
| 188 | if not size: |
| 189 | return |
| 190 | |
| 191 | if name not in kobjects: |
| 192 | at = AggregateType(offset, name, size) |
| 193 | type_env[offset] = at |
| 194 | |
| 195 | for child in die.iter_children(): |
| 196 | if child.tag != "DW_TAG_member": |
| 197 | continue |
| 198 | child_type = die_get_type_offset(child) |
| 199 | member_offset = child.attributes["DW_AT_data_member_location"].value |
| 200 | cname = die_get_name(child) or "<anon>" |
| 201 | m = AggregateTypeMember(child.offset, cname, child_type, |
| 202 | member_offset) |
| 203 | at.add_member(m) |
| 204 | |
| 205 | return |
| 206 | |
| 207 | type_env[offset] = KobjectType(offset, name, size) |
| 208 | |
| 209 | |
| 210 | def analyze_die_array(die): |
| 211 | type_offset = die_get_type_offset(die) |
| 212 | elements = 1 |
| 213 | size_found = False |
| 214 | |
| 215 | for child in die.iter_children(): |
| 216 | if child.tag != "DW_TAG_subrange_type": |
| 217 | continue |
| 218 | if "DW_AT_upper_bound" not in child.attributes: |
| 219 | continue |
| 220 | |
| 221 | ub = child.attributes["DW_AT_upper_bound"] |
| 222 | if not ub.form.startswith("DW_FORM_data"): |
| 223 | continue |
| 224 | |
| 225 | size_found = True |
| 226 | elements = elements * (ub.value + 1) |
| 227 | |
| 228 | if not size_found: |
| 229 | return |
| 230 | |
| 231 | type_env[die.offset] = ArrayType(die.offset, elements, type_offset) |
| 232 | |
| 233 | |
| 234 | def get_filename_lineno(die): |
| 235 | lp_header = die.dwarfinfo.line_program_for_CU(die.cu).header |
| 236 | files = lp_header["file_entry"] |
| 237 | includes = lp_header["include_directory"] |
| 238 | |
| 239 | fileinfo = files[die.attributes["DW_AT_decl_file"].value - 1] |
| 240 | filename = fileinfo.name.decode("utf-8") |
| 241 | filedir = includes[fileinfo.dir_index - 1].decode("utf-8") |
| 242 | |
| 243 | path = os.path.join(filedir, filename) |
| 244 | lineno = die.attributes["DW_AT_decl_line"].value |
| 245 | return (path, lineno) |
| 246 | |
| 247 | |
| 248 | def find_kobjects(elf, syms): |
| 249 | if not elf.has_dwarf_info(): |
| 250 | sys.stderr.write("ELF file has no DWARF information\n"); |
| 251 | sys.exit(1) |
| 252 | |
| 253 | kram_start = syms["__kernel_ram_start"] |
| 254 | kram_end = syms["__kernel_ram_end"] |
| 255 | |
| 256 | di = elf.get_dwarf_info() |
| 257 | |
| 258 | variables = [] |
| 259 | |
| 260 | # Step 1: collect all type information. |
| 261 | for CU in di.iter_CUs(): |
| 262 | CU_path = CU.get_top_DIE().get_full_path() |
| 263 | lp = di.line_program_for_CU(CU) |
| 264 | |
| 265 | for idx, die in enumerate(CU.iter_DIEs()): |
| 266 | # Unions are disregarded, kernel objects should never be union |
| 267 | # members since the memory is not dedicated to that object and |
| 268 | # could be something else |
| 269 | if die.tag == "DW_TAG_structure_type": |
| 270 | analyze_die_struct(die) |
| 271 | elif die.tag == "DW_TAG_array_type": |
| 272 | analyze_die_array(die) |
| 273 | elif die.tag == "DW_TAG_variable": |
| 274 | variables.append(die) |
| 275 | |
| 276 | # Step 2: filter type_env to only contain kernel objects, or structs and |
| 277 | # arrays of kernel objects |
| 278 | bad_offsets = [] |
| 279 | for offset, type_object in type_env.items(): |
| 280 | if not type_object.has_kobject(): |
| 281 | bad_offsets.append(offset) |
| 282 | |
| 283 | for offset in bad_offsets: |
| 284 | del type_env[offset] |
| 285 | |
| 286 | # Step 3: Now that we know all the types we are looking for, examine |
| 287 | # all variables |
| 288 | all_objs = [] |
| 289 | |
| 290 | # Gross hack, see below |
| 291 | work_q_found = False |
| 292 | |
| 293 | for die in variables: |
| 294 | name = die_get_name(die) |
| 295 | if not name: |
| 296 | continue |
| 297 | |
| 298 | type_offset = die_get_type_offset(die) |
| 299 | |
| 300 | # Is this a kernel object, or a structure containing kernel objects? |
| 301 | if type_offset not in type_env: |
| 302 | continue |
| 303 | |
| 304 | if "DW_AT_declaration" in die.attributes: |
| 305 | # FIXME: why does k_sys_work_q not resolve an address in the DWARF |
| 306 | # data??? Every single instance it finds is an extern definition |
| 307 | # but not the actual instance in system_work_q.c |
| 308 | # Is there something weird about how lib-y stuff is linked? |
| 309 | if name == "k_sys_work_q" and not work_q_found and name in syms: |
| 310 | addr = syms[name] |
| 311 | work_q_found = True |
| 312 | else: |
| 313 | continue |
| 314 | else: |
| 315 | if "DW_AT_location" not in die.attributes: |
| 316 | debug_die(die, "No location information for object '%s'; possibly stack allocated" |
| 317 | % name) |
| 318 | continue |
| 319 | |
| 320 | loc = die.attributes["DW_AT_location"] |
| 321 | if loc.form != "DW_FORM_exprloc": |
| 322 | debug_die(die, "kernel object '%s' unexpected location format" % name) |
| 323 | continue |
| 324 | |
| 325 | opcode = loc.value[0] |
| 326 | if opcode != DW_OP_addr: |
| 327 | |
| 328 | # Check if frame pointer offset DW_OP_fbreg |
| 329 | if opcode == DW_OP_fbreg: |
| 330 | debug_die(die, "kernel object '%s' found on stack" % name) |
| 331 | else: |
| 332 | debug_die(die, "kernel object '%s' unexpected exprloc opcode %s" |
| 333 | % (name, hex(opcode))) |
| 334 | continue |
| 335 | |
| 336 | addr = (loc.value[1] | (loc.value[2] << 8) | (loc.value[3] << 16) | |
| 337 | (loc.value[4] << 24)) |
| 338 | |
| 339 | if addr < kram_start or addr >= kram_end: |
| 340 | if addr == 0: |
| 341 | # Never linked; gc-sections deleted it |
| 342 | continue |
| 343 | |
| 344 | debug_die(die, "object '%s' found in invalid location %s" % |
| 345 | (name, hex(addr))); |
| 346 | continue |
| 347 | |
| 348 | type_obj = type_env[type_offset] |
| 349 | objs = type_obj.get_kobjects(addr) |
| 350 | all_objs.extend(objs) |
| 351 | |
| 352 | debug("symbol '%s' at %s contains %d object(s)" % (name, hex(addr), |
| 353 | len(objs))) |
| 354 | |
| 355 | debug("found %d kernel object instances total" % len(all_objs)) |
| 356 | return all_objs |
| 357 | |
| 358 | |
| 359 | header = """%compare-lengths |
| 360 | %define lookup-function-name _k_object_lookup |
| 361 | %language=ANSI-C |
| 362 | %struct-type |
| 363 | %{ |
| 364 | #include <kernel.h> |
| 365 | #include <string.h> |
| 366 | %} |
| 367 | struct _k_object; |
| 368 | %% |
| 369 | """ |
| 370 | |
| 371 | |
| 372 | # Different versions of gperf have different prototypes for the lookup function, |
| 373 | # best to implement the wrapper here. The pointer value itself is turned into |
| 374 | # a string, we told gperf to expect binary strings that are not NULL-terminated. |
| 375 | footer = """%% |
| 376 | struct _k_object *_k_object_find(void *obj) |
| 377 | { |
| 378 | return _k_object_lookup((const char *)obj, sizeof(void *)); |
| 379 | } |
| 380 | """ |
| 381 | |
| 382 | |
| 383 | def write_gperf_table(fp, objs, static_begin, static_end): |
| 384 | fp.write(header) |
| 385 | |
| 386 | for obj_addr, obj_type in objs: |
| 387 | # pre-initialized objects fall within this memory range, they are |
| 388 | # either completely initialized at build time, or done automatically |
| 389 | # at boot during some PRE_KERNEL_* phase |
| 390 | initialized = obj_addr >= static_begin and obj_addr < static_end |
| 391 | |
| 392 | byte_str = struct.pack("<I" if args.little_endian else ">I", obj_addr) |
| 393 | fp.write("\"") |
| 394 | for byte in byte_str: |
| 395 | val = "\\x%02x" % byte |
| 396 | fp.write(val) |
| 397 | |
| 398 | fp.write("\",{},%s,%s\n" % (obj_type, |
| 399 | "K_OBJ_FLAG_INITIALIZED" if initialized else "0")) |
| 400 | |
| 401 | fp.write(footer) |
| 402 | |
| 403 | |
| 404 | def get_symbols(obj): |
| 405 | for section in obj.iter_sections(): |
| 406 | if isinstance(section, SymbolTableSection): |
| 407 | return {sym.name: sym.entry.st_value |
| 408 | for sym in section.iter_symbols()} |
| 409 | |
| 410 | raise LookupError("Could not find symbol table") |
| 411 | |
| 412 | |
| 413 | def parse_args(): |
| 414 | global args |
| 415 | |
| 416 | parser = argparse.ArgumentParser(description = __doc__, |
| 417 | formatter_class = argparse.RawDescriptionHelpFormatter) |
| 418 | |
| 419 | parser.add_argument("-k", "--kernel", required=True, |
| 420 | help="Input zephyr ELF binary") |
| 421 | parser.add_argument("-o", "--output", required=True, |
| 422 | help="Output list of kernel object addresses for gperf use") |
| 423 | parser.add_argument("-v", "--verbose", action="store_true", |
| 424 | help="Print extra debugging information") |
| 425 | args = parser.parse_args() |
| 426 | |
| 427 | |
| 428 | def main(): |
| 429 | parse_args() |
| 430 | |
| 431 | with open(args.kernel, "rb") as fp: |
| 432 | elf = ELFFile(fp) |
| 433 | args.little_endian = elf.little_endian |
| 434 | syms = get_symbols(elf) |
| 435 | objs = find_kobjects(elf, syms) |
| 436 | |
| 437 | with open(args.output, "w") as fp: |
| 438 | write_gperf_table(fp, objs, syms["_static_kernel_objects_begin"], |
| 439 | syms["_static_kernel_objects_end"]) |
| 440 | |
| 441 | if __name__ == "__main__": |
| 442 | main() |
| 443 | |