diff options
Diffstat (limited to 'bolt/utils/llvm-bolt-wrapper.py')
-rwxr-xr-x | bolt/utils/llvm-bolt-wrapper.py | 247 |
1 files changed, 150 insertions, 97 deletions
diff --git a/bolt/utils/llvm-bolt-wrapper.py b/bolt/utils/llvm-bolt-wrapper.py index 901304d..cd48204 100755 --- a/bolt/utils/llvm-bolt-wrapper.py +++ b/bolt/utils/llvm-bolt-wrapper.py @@ -41,9 +41,10 @@ from textwrap import dedent # # optional, defaults to timing.log in CWD # timing_file = timing1.log + def read_cfg(): src_dir = os.path.dirname(os.path.abspath(__file__)) - cfg = configparser.ConfigParser(allow_no_value = True) + cfg = configparser.ConfigParser(allow_no_value=True) cfgs = cfg.read("llvm-bolt-wrapper.ini") if not cfgs: cfgs = cfg.read(os.path.join(src_dir, "llvm-bolt-wrapper.ini")) @@ -51,69 +52,72 @@ def read_cfg(): def get_cfg(key): # if key is not present in config, assume False - if key not in cfg['config']: + if key not in cfg["config"]: return False # if key is present, but has no value, assume True - if not cfg['config'][key]: + if not cfg["config"][key]: return True # if key has associated value, interpret the value - return cfg['config'].getboolean(key) + return cfg["config"].getboolean(key) d = { # BOLT binary locations - 'BASE_BOLT': cfg['config']['base_bolt'], - 'CMP_BOLT': cfg['config']['cmp_bolt'], + "BASE_BOLT": cfg["config"]["base_bolt"], + "CMP_BOLT": cfg["config"]["cmp_bolt"], # optional - 'VERBOSE': get_cfg('verbose'), - 'KEEP_TMP': get_cfg('keep_tmp'), - 'NO_MINIMIZE': get_cfg('no_minimize'), - 'RUN_SEQUENTIALLY': get_cfg('run_sequentially'), - 'COMPARE_OUTPUT': get_cfg('compare_output'), - 'SKIP_BINARY_CMP': get_cfg('skip_binary_cmp'), - 'TIMING_FILE': cfg['config'].get('timing_file', 'timing.log'), + "VERBOSE": get_cfg("verbose"), + "KEEP_TMP": get_cfg("keep_tmp"), + "NO_MINIMIZE": get_cfg("no_minimize"), + "RUN_SEQUENTIALLY": get_cfg("run_sequentially"), + "COMPARE_OUTPUT": get_cfg("compare_output"), + "SKIP_BINARY_CMP": get_cfg("skip_binary_cmp"), + "TIMING_FILE": cfg["config"].get("timing_file", "timing.log"), } - if d['VERBOSE']: + if d["VERBOSE"]: print(f"Using config {os.path.abspath(cfgs[0])}") return SimpleNamespace(**d) + # perf2bolt mode -PERF2BOLT_MODE = ['-aggregate-only', '-ignore-build-id'] +PERF2BOLT_MODE = ["-aggregate-only", "-ignore-build-id"] # boltdiff mode -BOLTDIFF_MODE = ['-diff-only', '-o', '/dev/null'] +BOLTDIFF_MODE = ["-diff-only", "-o", "/dev/null"] # options to suppress binary differences as much as possible -MINIMIZE_DIFFS = ['-bolt-info=0'] +MINIMIZE_DIFFS = ["-bolt-info=0"] # bolt output options that need to be intercepted BOLT_OUTPUT_OPTS = { - '-o': 'BOLT output binary', - '-w': 'BOLT recorded profile', + "-o": "BOLT output binary", + "-w": "BOLT recorded profile", } # regex patterns to exclude the line from log comparison SKIP_MATCH = [ - 'BOLT-INFO: BOLT version', - r'^Args: ', - r'^BOLT-DEBUG:', - r'BOLT-INFO:.*data.*output data', - 'WARNING: reading perf data directly', + "BOLT-INFO: BOLT version", + r"^Args: ", + r"^BOLT-DEBUG:", + r"BOLT-INFO:.*data.*output data", + "WARNING: reading perf data directly", ] + def run_cmd(cmd, out_f, cfg): if cfg.VERBOSE: - print(' '.join(cmd)) + print(" ".join(cmd)) return subprocess.Popen(cmd, stdout=out_f, stderr=subprocess.STDOUT) + def run_bolt(bolt_path, bolt_args, out_f, cfg): - p2b = os.path.basename(sys.argv[0]) == 'perf2bolt' # perf2bolt mode - bd = os.path.basename(sys.argv[0]) == 'llvm-boltdiff' # boltdiff mode - hm = sys.argv[1] == 'heatmap' # heatmap mode - cmd = ['/usr/bin/time', '-f', '%e %M', bolt_path] + bolt_args + p2b = os.path.basename(sys.argv[0]) == "perf2bolt" # perf2bolt mode + bd = os.path.basename(sys.argv[0]) == "llvm-boltdiff" # boltdiff mode + hm = sys.argv[1] == "heatmap" # heatmap mode + cmd = ["/usr/bin/time", "-f", "%e %M", bolt_path] + bolt_args if p2b: # -ignore-build-id can occur at most once, hence remove it from cmd - if '-ignore-build-id' in cmd: - cmd.remove('-ignore-build-id') + if "-ignore-build-id" in cmd: + cmd.remove("-ignore-build-id") cmd += PERF2BOLT_MODE elif bd: cmd += BOLTDIFF_MODE @@ -121,55 +125,65 @@ def run_bolt(bolt_path, bolt_args, out_f, cfg): cmd += MINIMIZE_DIFFS return run_cmd(cmd, out_f, cfg) + def prepend_dash(args: Mapping[AnyStr, AnyStr]) -> Sequence[AnyStr]: - ''' + """ Accepts parsed arguments and returns flat list with dash prepended to the option. Example: Namespace(o='test.tmp') -> ['-o', 'test.tmp'] - ''' - dashed = [('-'+key,value) for (key,value) in args.items()] + """ + dashed = [("-" + key, value) for (key, value) in args.items()] flattened = list(sum(dashed, ())) return flattened + def replace_cmp_path(tmp: AnyStr, args: Mapping[AnyStr, AnyStr]) -> Sequence[AnyStr]: - ''' + """ Keeps file names, but replaces the path to a temp folder. Example: Namespace(o='abc/test.tmp') -> Namespace(o='/tmp/tmpf9un/test.tmp') Except preserve /dev/null. - ''' - replace_path = lambda x: os.path.join(tmp, os.path.basename(x)) if x != '/dev/null' else '/dev/null' + """ + replace_path = ( + lambda x: os.path.join(tmp, os.path.basename(x)) + if x != "/dev/null" + else "/dev/null" + ) new_args = {key: replace_path(value) for key, value in args.items()} return prepend_dash(new_args) + def preprocess_args(args: argparse.Namespace) -> Mapping[AnyStr, AnyStr]: - ''' + """ Drop options that weren't parsed (e.g. -w), convert to a dict - ''' + """ return {key: value for key, value in vars(args).items() if value} -def write_to(txt, filename, mode='w'): + +def write_to(txt, filename, mode="w"): with open(filename, mode) as f: f.write(txt) + def wait(proc, fdesc): proc.wait() fdesc.close() return open(fdesc.name) + def compare_logs(main, cmp, skip_begin=0, skip_end=0, str_input=True): - ''' + """ Compares logs but allows for certain lines to be excluded from comparison. If str_input is True (default), the input it assumed to be a string, which is split into lines. Otherwise the input is assumed to be a file. Returns None on success, mismatch otherwise. - ''' + """ main_inp = main.splitlines() if str_input else main.readlines() cmp_inp = cmp.splitlines() if str_input else cmp.readlines() # rewind logs after consumption if not str_input: main.seek(0) cmp.seek(0) - for lhs, rhs in list(zip(main_inp, cmp_inp))[skip_begin:-skip_end or None]: + for lhs, rhs in list(zip(main_inp, cmp_inp))[skip_begin : -skip_end or None]: if lhs != rhs: # check skip patterns for skip in SKIP_MATCH: @@ -181,52 +195,59 @@ def compare_logs(main, cmp, skip_begin=0, skip_end=0, str_input=True): return (lhs, rhs) return None + def fmt_cmp(cmp_tuple): if not cmp_tuple: - return '' - return f'main:\n{cmp_tuple[0]}\ncmp:\n{cmp_tuple[1]}\n' + return "" + return f"main:\n{cmp_tuple[0]}\ncmp:\n{cmp_tuple[1]}\n" + def compare_with(lhs, rhs, cmd, skip_begin=0, skip_end=0): - ''' + """ Runs cmd on both lhs and rhs and compares stdout. Returns tuple (mismatch, lhs_stdout): - if stdout matches between two files, mismatch is None, - otherwise mismatch is a tuple of mismatching lines. - ''' - run = lambda binary: subprocess.run(cmd.split() + [binary], - text=True, check=True, - capture_output=True).stdout + """ + run = lambda binary: subprocess.run( + cmd.split() + [binary], text=True, check=True, capture_output=True + ).stdout run_lhs = run(lhs) run_rhs = run(rhs) cmp = compare_logs(run_lhs, run_rhs, skip_begin, skip_end) return cmp, run_lhs + def parse_cmp_offset(cmp_out): - ''' + """ Extracts byte number from cmp output: file1 file2 differ: byte X, line Y - ''' + """ # NOTE: cmp counts bytes starting from 1! - return int(re.search(r'byte (\d+),', cmp_out).groups()[0]) - 1 + return int(re.search(r"byte (\d+),", cmp_out).groups()[0]) - 1 + def report_real_time(binary, main_err, cmp_err, cfg): - ''' + """ Extracts real time from stderr and appends it to TIMING FILE it as csv: "output binary; base bolt; cmp bolt" - ''' + """ + def get_real_from_stderr(logline): - return '; '.join(logline.split()) + return "; ".join(logline.split()) + for line in main_err: pass main = get_real_from_stderr(line) for line in cmp_err: pass cmp = get_real_from_stderr(line) - write_to(f"{binary}; {main}; {cmp}\n", cfg.TIMING_FILE, 'a') + write_to(f"{binary}; {main}; {cmp}\n", cfg.TIMING_FILE, "a") # rewind logs after consumption main_err.seek(0) cmp_err.seek(0) + def clean_exit(tmp, out, exitcode, cfg): # temp files are only cleaned on success if not cfg.KEEP_TMP: @@ -236,8 +257,9 @@ def clean_exit(tmp, out, exitcode, cfg): shutil.copyfileobj(out, sys.stdout) sys.exit(exitcode) + def find_section(offset, readelf_hdr): - hdr = readelf_hdr.split('\n') + hdr = readelf_hdr.split("\n") section = None # extract sections table (parse objdump -hw output) for line in hdr[5:-1]: @@ -247,7 +269,7 @@ def find_section(offset, readelf_hdr): # section size size = int(cols[2], 16) if offset >= file_offset and offset < file_offset + size: - if sys.stdout.isatty(): # terminal supports colors + if sys.stdout.isatty(): # terminal supports colors print(f"\033[1m{line}\033[0m") else: print(f">{line}") @@ -256,34 +278,57 @@ def find_section(offset, readelf_hdr): print(line) return section + def main_config_generator(): parser = argparse.ArgumentParser() - parser.add_argument('base_bolt', help='Full path to base llvm-bolt binary') - parser.add_argument('cmp_bolt', help='Full path to cmp llvm-bolt binary') - parser.add_argument('--verbose', action='store_true', - help='Print subprocess invocation cmdline (default False)') - parser.add_argument('--keep_tmp', action='store_true', - help = 'Preserve tmp folder on a clean exit ' - '(tmp directory is preserved on crash by default)') - parser.add_argument('--no_minimize', action='store_true', - help=f'Do not add `{MINIMIZE_DIFFS}` that is used ' - 'by default to reduce binary differences') - parser.add_argument('--run_sequentially', action='store_true', - help='Run both binaries sequentially (default ' - 'in parallel). Use for timing comparison') - parser.add_argument('--compare_output', action='store_true', - help = 'Compare bolt stdout/stderr (disabled by default)') - parser.add_argument('--skip_binary_cmp', action='store_true', - help = 'Disable output comparison') - parser.add_argument('--timing_file', help = 'Override path to timing log ' - 'file (default `timing.log` in CWD)') + parser.add_argument("base_bolt", help="Full path to base llvm-bolt binary") + parser.add_argument("cmp_bolt", help="Full path to cmp llvm-bolt binary") + parser.add_argument( + "--verbose", + action="store_true", + help="Print subprocess invocation cmdline (default False)", + ) + parser.add_argument( + "--keep_tmp", + action="store_true", + help="Preserve tmp folder on a clean exit " + "(tmp directory is preserved on crash by default)", + ) + parser.add_argument( + "--no_minimize", + action="store_true", + help=f"Do not add `{MINIMIZE_DIFFS}` that is used " + "by default to reduce binary differences", + ) + parser.add_argument( + "--run_sequentially", + action="store_true", + help="Run both binaries sequentially (default " + "in parallel). Use for timing comparison", + ) + parser.add_argument( + "--compare_output", + action="store_true", + help="Compare bolt stdout/stderr (disabled by default)", + ) + parser.add_argument( + "--skip_binary_cmp", action="store_true", help="Disable output comparison" + ) + parser.add_argument( + "--timing_file", + help="Override path to timing log " "file (default `timing.log` in CWD)", + ) args = parser.parse_args() - print(dedent(f'''\ + print( + dedent( + f"""\ [config] # mandatory base_bolt = {args.base_bolt} - cmp_bolt = {args.cmp_bolt}''')) + cmp_bolt = {args.cmp_bolt}""" + ) + ) del args.base_bolt del args.cmp_bolt d = vars(args) @@ -293,6 +338,7 @@ def main_config_generator(): if value: print(key) + def main(): cfg = read_cfg() # intercept output arguments @@ -309,8 +355,8 @@ def main(): args = prepend_dash(args) # run both BOLT binaries - main_f = open(os.path.join(tmp, 'main_bolt.stdout'), 'w') - cmp_f = open(os.path.join(tmp, 'cmp_bolt.stdout'), 'w') + main_f = open(os.path.join(tmp, "main_bolt.stdout"), "w") + cmp_f = open(os.path.join(tmp, "cmp_bolt.stdout"), "w") main_bolt = run_bolt(cfg.BASE_BOLT, unknownargs + args, main_f, cfg) if cfg.RUN_SEQUENTIALLY: main_out = wait(main_bolt, main_f) @@ -330,22 +376,26 @@ def main(): cfg.SKIP_BINARY_CMP = True # compare logs, skip_end=1 skips the line with time - out = compare_logs(main_out, cmp_out, skip_end=1, str_input=False) if cfg.COMPARE_OUTPUT else None + out = ( + compare_logs(main_out, cmp_out, skip_end=1, str_input=False) + if cfg.COMPARE_OUTPUT + else None + ) if out: print(tmp) print(fmt_cmp(out)) - write_to(fmt_cmp(out), os.path.join(tmp, 'summary.txt')) + write_to(fmt_cmp(out), os.path.join(tmp, "summary.txt")) exit("logs mismatch") - if os.path.basename(sys.argv[0]) == 'llvm-boltdiff': # boltdiff mode + if os.path.basename(sys.argv[0]) == "llvm-boltdiff": # boltdiff mode # no output binary to compare, so just exit clean_exit(tmp, main_out, main_bolt.returncode, cfg) # compare binaries (using cmp) - main_binary = args[args.index('-o')+1] - cmp_binary = cmp_args[cmp_args.index('-o')+1] - if main_binary == '/dev/null': - assert cmp_binary == '/dev/null' + main_binary = args[args.index("-o") + 1] + cmp_binary = cmp_args[cmp_args.index("-o") + 1] + if main_binary == "/dev/null": + assert cmp_binary == "/dev/null" cfg.SKIP_BINARY_CMP = True # report binary timing as csv: output binary; base bolt real; cmp bolt real @@ -368,23 +418,25 @@ def main(): assert not main_exists exit(f"{main_binary} doesn't exist") - cmp_proc = subprocess.run(['cmp', '-b', main_binary, cmp_binary], - capture_output=True, text=True) + cmp_proc = subprocess.run( + ["cmp", "-b", main_binary, cmp_binary], capture_output=True, text=True + ) if cmp_proc.returncode: # check if output is an ELF file (magic bytes) - with open(main_binary, 'rb') as f: + with open(main_binary, "rb") as f: magic = f.read(4) - if magic != b'\x7fELF': + if magic != b"\x7fELF": exit("output mismatch") # check if ELF headers match - mismatch, _ = compare_with(main_binary, cmp_binary, 'readelf -We') + mismatch, _ = compare_with(main_binary, cmp_binary, "readelf -We") if mismatch: print(fmt_cmp(mismatch)) - write_to(fmt_cmp(mismatch), os.path.join(tmp, 'headers.txt')) + write_to(fmt_cmp(mismatch), os.path.join(tmp, "headers.txt")) exit("headers mismatch") # if headers match, compare sections (skip line with filename) - mismatch, hdr = compare_with(main_binary, cmp_binary, 'objdump -hw', - skip_begin=2) + mismatch, hdr = compare_with( + main_binary, cmp_binary, "objdump -hw", skip_begin=2 + ) assert not mismatch # check which section has the first mismatch mismatch_offset = parse_cmp_offset(cmp_proc.stdout) @@ -393,6 +445,7 @@ def main(): clean_exit(tmp, main_out, main_bolt.returncode, cfg) + if __name__ == "__main__": # config generator mode if the script is launched as is if os.path.basename(__file__) == "llvm-bolt-wrapper.py": |