| #!/usr/bin/env python3 |
| |
| """ |
| strip_asm.py - Cleanup ASM output for the specified file |
| """ |
| |
| import os |
| import re |
| import sys |
| from argparse import ArgumentParser |
| |
| |
| def find_used_labels(asm): |
| found = set() |
| label_re = re.compile(r"\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") |
| for line in asm.splitlines(): |
| m = label_re.match(line) |
| if m: |
| found.add(".L%s" % m.group(1)) |
| return found |
| |
| |
| def normalize_labels(asm): |
| decls = set() |
| label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") |
| for line in asm.splitlines(): |
| m = label_decl.match(line) |
| if m: |
| decls.add(m.group(0)) |
| if len(decls) == 0: |
| return asm |
| needs_dot = next(iter(decls))[0] != "." |
| if not needs_dot: |
| return asm |
| for ld in decls: |
| asm = re.sub(r"(^|\s+)" + ld + r"(?=:|\s)", "\\1." + ld, asm) |
| return asm |
| |
| |
| def transform_labels(asm): |
| asm = normalize_labels(asm) |
| used_decls = find_used_labels(asm) |
| new_asm = "" |
| label_decl = re.compile(r"^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") |
| for line in asm.splitlines(): |
| m = label_decl.match(line) |
| if not m or m.group(0) in used_decls: |
| new_asm += line |
| new_asm += "\n" |
| return new_asm |
| |
| |
| def is_identifier(tk): |
| if len(tk) == 0: |
| return False |
| first = tk[0] |
| if not first.isalpha() and first != "_": |
| return False |
| for i in range(1, len(tk)): |
| c = tk[i] |
| if not c.isalnum() and c != "_": |
| return False |
| return True |
| |
| |
| def process_identifiers(line): |
| """ |
| process_identifiers - process all identifiers and modify them to have |
| consistent names across all platforms; specifically across ELF and MachO. |
| For example, MachO inserts an additional understore at the beginning of |
| names. This function removes that. |
| """ |
| parts = re.split(r"([a-zA-Z0-9_]+)", line) |
| new_line = "" |
| for tk in parts: |
| if is_identifier(tk): |
| if tk.startswith("__Z"): |
| tk = tk[1:] |
| elif ( |
| tk.startswith("_") |
| and len(tk) > 1 |
| and tk[1].isalpha() |
| and tk[1] != "Z" |
| ): |
| tk = tk[1:] |
| new_line += tk |
| return new_line |
| |
| |
| def process_asm(asm): |
| """ |
| Strip the ASM of unwanted directives and lines |
| """ |
| new_contents = "" |
| asm = transform_labels(asm) |
| |
| # TODO: Add more things we want to remove |
| discard_regexes = [ |
| re.compile(r"\s+\..*$"), # directive |
| re.compile(r"\s*#(NO_APP|APP)$"), # inline ASM |
| re.compile(r"\s*#.*$"), # comment line |
| re.compile( |
| r"\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)" |
| ), # global directive |
| re.compile( |
| r"\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)" |
| ), |
| ] |
| keep_regexes: list[re.Pattern] = [] |
| fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") |
| for line in asm.splitlines(): |
| # Remove Mach-O attribute |
| line = line.replace("@GOTPCREL", "") |
| add_line = True |
| for reg in discard_regexes: |
| if reg.match(line) is not None: |
| add_line = False |
| break |
| for reg in keep_regexes: |
| if reg.match(line) is not None: |
| add_line = True |
| break |
| if add_line: |
| if fn_label_def.match(line) and len(new_contents) != 0: |
| new_contents += "\n" |
| line = process_identifiers(line) |
| new_contents += line |
| new_contents += "\n" |
| return new_contents |
| |
| |
| def main(): |
| parser = ArgumentParser(description="generate a stripped assembly file") |
| parser.add_argument( |
| "input", |
| metavar="input", |
| type=str, |
| nargs=1, |
| help="An input assembly file", |
| ) |
| parser.add_argument( |
| "out", metavar="output", type=str, nargs=1, help="The output file" |
| ) |
| args, unknown_args = parser.parse_known_args() |
| input = args.input[0] |
| output = args.out[0] |
| if not os.path.isfile(input): |
| print("ERROR: input file '%s' does not exist" % input) |
| sys.exit(1) |
| |
| with open(input, "r") as f: |
| contents = f.read() |
| new_contents = process_asm(contents) |
| with open(output, "w") as f: |
| f.write(new_contents) |
| |
| |
| if __name__ == "__main__": |
| main() |
| |
| # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 |
| # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; |
| # kate: indent-mode python; remove-trailing-spaces modified; |