#!/usr/bin/env python3 """Calls C-Reduce to create a minimal reproducer for clang crashes. Output files: *.reduced.sh -- crash reproducer with minimal arguments *.reduced.cpp -- the reduced file *.test.sh -- interestingness test for C-Reduce """ from __future__ import print_function from argparse import ArgumentParser, RawTextHelpFormatter import os import re import shutil import stat import sys import subprocess import pipes import shlex import tempfile import shutil import multiprocessing verbose = False creduce_cmd = None clang_cmd = None def verbose_print(*args, **kwargs): if verbose: print(*args, **kwargs) def check_file(fname): fname = os.path.normpath(fname) if not os.path.isfile(fname): sys.exit("ERROR: %s does not exist" % (fname)) return fname def check_cmd(cmd_name, cmd_dir, cmd_path=None): """ Returns absolute path to cmd_path if it is given, or absolute path to cmd_dir/cmd_name. """ if cmd_path: # Make the path absolute so the creduce test can be run from any directory. cmd_path = os.path.abspath(cmd_path) cmd = shutil.which(cmd_path) if cmd: return cmd sys.exit("ERROR: executable `%s` not found" % (cmd_path)) cmd = shutil.which(cmd_name, path=cmd_dir) if cmd: return cmd if not cmd_dir: cmd_dir = "$PATH" sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir)) def quote_cmd(cmd): return " ".join(pipes.quote(arg) for arg in cmd) def write_to_script(text, filename): with open(filename, "w") as f: f.write(text) os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC) class Reduce(object): def __init__(self, crash_script, file_to_reduce, core_number): crash_script_name, crash_script_ext = os.path.splitext(crash_script) file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce) self.testfile = file_reduce_name + ".test.sh" self.crash_script = crash_script_name + ".reduced" + crash_script_ext self.file_to_reduce = file_reduce_name + ".reduced" + file_reduce_ext shutil.copy(file_to_reduce, self.file_to_reduce) self.clang = clang_cmd self.clang_args = [] self.expected_output = [] self.needs_stack_trace = False self.creduce_flags = ["--tidy"] self.creduce_flags = ["--n", str(core_number)] self.read_clang_args(crash_script, file_to_reduce) self.read_expected_output() def get_crash_cmd(self, cmd=None, args=None, filename=None): if not cmd: cmd = self.clang if not args: args = self.clang_args if not filename: filename = self.file_to_reduce return [cmd] + args + [filename] def read_clang_args(self, crash_script, filename): print("\nReading arguments from crash script...") with open(crash_script) as f: # Assume clang call is the first non comment line. cmd = [] for line in f: if not line.lstrip().startswith("#"): cmd = shlex.split(line) break if not cmd: sys.exit("Could not find command in the crash script.") # Remove clang and filename from the command # Assume the last occurrence of the filename is the clang input file del cmd[0] for i in range(len(cmd) - 1, -1, -1): if cmd[i] == filename: del cmd[i] break self.clang_args = cmd verbose_print("Clang arguments:", quote_cmd(self.clang_args)) def read_expected_output(self): print("\nGetting expected crash output...") p = subprocess.Popen( self.get_crash_cmd(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT ) crash_output, _ = p.communicate() result = [] # Remove color codes ansi_escape = r"\x1b\[[0-?]*m" crash_output = re.sub(ansi_escape, "", crash_output.decode("utf-8")) # Look for specific error messages regexes = [ r"Assertion .+ failed", # Linux assert() r"Assertion failed: .+,", # FreeBSD/Mac assert() r"fatal error: error in backend: .+", r"LLVM ERROR: .+", r"UNREACHABLE executed at .+?!", r"LLVM IR generation of declaration '.+'", r"Generating code for declaration '.+'", r"\*\*\* Bad machine code: .+ \*\*\*", r"ERROR: .*Sanitizer: [^ ]+ ", ] for msg_re in regexes: match = re.search(msg_re, crash_output) if match: msg = match.group(0) result = [msg] print("Found message:", msg) break # If no message was found, use the top five stack trace functions, # ignoring some common functions # Five is a somewhat arbitrary number; the goal is to get a small number # of identifying functions with some leeway for common functions if not result: self.needs_stack_trace = True stacktrace_re = r"[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\(" filters = [ "PrintStackTrace", "RunSignalHandlers", "CleanupOnSignal", "HandleCrash", "SignalHandler", "__restore_rt", "gsignal", "abort", ] def skip_function(func_name): return any(name in func_name for name in filters) matches = re.findall(stacktrace_re, crash_output) result = [x for x in matches if x and not skip_function(x)][:5] for msg in result: print("Found stack trace function:", msg) if not result: print("ERROR: no crash was found") print("The crash output was:\n========\n%s========" % crash_output) sys.exit(1) self.expected_output = result def check_expected_output(self, args=None, filename=None): if not args: args = self.clang_args if not filename: filename = self.file_to_reduce p = subprocess.Popen( self.get_crash_cmd(args=args, filename=filename), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) crash_output, _ = p.communicate() return all(msg in crash_output.decode("utf-8") for msg in self.expected_output) def write_interestingness_test(self): print("\nCreating the interestingness test...") # Disable symbolization if it's not required to avoid slow symbolization. disable_symbolization = "" if not self.needs_stack_trace: disable_symbolization = "export LLVM_DISABLE_SYMBOLIZATION=1" output = """#!/bin/bash %s if %s >& t.log ; then exit 1 fi """ % ( disable_symbolization, quote_cmd(self.get_crash_cmd()), ) for msg in self.expected_output: output += "grep -F %s t.log || exit 1\n" % pipes.quote(msg) write_to_script(output, self.testfile) self.check_interestingness() def check_interestingness(self): testfile = os.path.abspath(self.testfile) # Check that the test considers the original file interesting with open(os.devnull, "w") as devnull: returncode = subprocess.call(testfile, stdout=devnull) if returncode: sys.exit("The interestingness test does not pass for the original file.") # Check that an empty file is not interesting # Instead of modifying the filename in the test file, just run the command with tempfile.NamedTemporaryFile() as empty_file: is_interesting = self.check_expected_output(filename=empty_file.name) if is_interesting: sys.exit("The interestingness test passes for an empty file.") def clang_preprocess(self): print("\nTrying to preprocess the source file...") with tempfile.NamedTemporaryFile() as tmpfile: cmd_preprocess = self.get_crash_cmd() + ["-E", "-o", tmpfile.name] cmd_preprocess_no_lines = cmd_preprocess + ["-P"] try: subprocess.check_call(cmd_preprocess_no_lines) if self.check_expected_output(filename=tmpfile.name): print("Successfully preprocessed with line markers removed") shutil.copy(tmpfile.name, self.file_to_reduce) else: subprocess.check_call(cmd_preprocess) if self.check_expected_output(filename=tmpfile.name): print("Successfully preprocessed without removing line markers") shutil.copy(tmpfile.name, self.file_to_reduce) else: print( "No longer crashes after preprocessing -- " "using original source" ) except subprocess.CalledProcessError: print("Preprocessing failed") @staticmethod def filter_args( args, opts_equal=[], opts_startswith=[], opts_one_arg_startswith=[] ): result = [] skip_next = False for arg in args: if skip_next: skip_next = False continue if any(arg == a for a in opts_equal): continue if any(arg.startswith(a) for a in opts_startswith): continue if any(arg.startswith(a) for a in opts_one_arg_startswith): skip_next = True continue result.append(arg) return result def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs): new_args = self.filter_args(args, **kwargs) if extra_arg: if extra_arg in new_args: new_args.remove(extra_arg) new_args.append(extra_arg) if new_args != args and self.check_expected_output(args=new_args): if msg: verbose_print(msg) return new_args return args def try_remove_arg_by_index(self, args, index): new_args = args[:index] + args[index + 1 :] removed_arg = args[index] # Heuristic for grouping arguments: # remove next argument if it doesn't start with "-" if index < len(new_args) and not new_args[index].startswith("-"): del new_args[index] removed_arg += " " + args[index + 1] if self.check_expected_output(args=new_args): verbose_print("Removed", removed_arg) return new_args, index return args, index + 1 def simplify_clang_args(self): """Simplify clang arguments before running C-Reduce to reduce the time the interestingness test takes to run. """ print("\nSimplifying the clang command...") # Remove some clang arguments to speed up the interestingness test new_args = self.clang_args new_args = self.try_remove_args( new_args, msg="Removed debug info options", opts_startswith=["-gcodeview", "-debug-info-kind=", "-debugger-tuning="], ) new_args = self.try_remove_args( new_args, msg="Removed --show-includes", opts_startswith=["--show-includes"] ) # Not suppressing warnings (-w) sometimes prevents the crash from occurring # after preprocessing new_args = self.try_remove_args( new_args, msg="Replaced -W options with -w", extra_arg="-w", opts_startswith=["-W"], ) new_args = self.try_remove_args( new_args, msg="Replaced optimization level with -O0", extra_arg="-O0", opts_startswith=["-O"], ) # Try to remove compilation steps new_args = self.try_remove_args( new_args, msg="Added -emit-llvm", extra_arg="-emit-llvm" ) new_args = self.try_remove_args( new_args, msg="Added -fsyntax-only", extra_arg="-fsyntax-only" ) # Try to make implicit int an error for more sensible test output new_args = self.try_remove_args( new_args, msg="Added -Werror=implicit-int", opts_equal=["-w"], extra_arg="-Werror=implicit-int", ) self.clang_args = new_args verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd())) def reduce_clang_args(self): """Minimize the clang arguments after running C-Reduce, to get the smallest command that reproduces the crash on the reduced file. """ print("\nReducing the clang crash command...") new_args = self.clang_args # Remove some often occurring args new_args = self.try_remove_args( new_args, msg="Removed -D options", opts_startswith=["-D"] ) new_args = self.try_remove_args( new_args, msg="Removed -D options", opts_one_arg_startswith=["-D"] ) new_args = self.try_remove_args( new_args, msg="Removed -I options", opts_startswith=["-I"] ) new_args = self.try_remove_args( new_args, msg="Removed -I options", opts_one_arg_startswith=["-I"] ) new_args = self.try_remove_args( new_args, msg="Removed -W options", opts_startswith=["-W"] ) # Remove other cases that aren't covered by the heuristic new_args = self.try_remove_args( new_args, msg="Removed -mllvm", opts_one_arg_startswith=["-mllvm"] ) i = 0 while i < len(new_args): new_args, i = self.try_remove_arg_by_index(new_args, i) self.clang_args = new_args reduced_cmd = quote_cmd(self.get_crash_cmd()) write_to_script(reduced_cmd, self.crash_script) print("Reduced command:", reduced_cmd) def run_creduce(self): print("\nRunning C-Reduce...") try: p = subprocess.Popen( [creduce_cmd] + self.creduce_flags + [self.testfile, self.file_to_reduce] ) p.communicate() except KeyboardInterrupt: # Hack to kill C-Reduce because it jumps into its own pgid print("\n\nctrl-c detected, killed creduce") p.kill() def main(): global verbose global creduce_cmd global clang_cmd parser = ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) parser.add_argument( "crash_script", type=str, nargs=1, help="Name of the script that generates the crash.", ) parser.add_argument( "file_to_reduce", type=str, nargs=1, help="Name of the file to be reduced." ) parser.add_argument( "--llvm-bin", dest="llvm_bin", type=str, help="Path to the LLVM bin directory." ) parser.add_argument( "--clang", dest="clang", type=str, help="The path to the `clang` executable. " "By default uses the llvm-bin directory.", ) parser.add_argument( "--creduce", dest="creduce", type=str, help="The path to the `creduce` executable. " "Required if `creduce` is not in PATH environment.", ) parser.add_argument( "--n", dest="core_number", type=int, default=max(4, multiprocessing.cpu_count() // 2), help="Number of cores to use.", ) parser.add_argument("-v", "--verbose", action="store_true") args = parser.parse_args() verbose = args.verbose llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None creduce_cmd = check_cmd("creduce", None, args.creduce) clang_cmd = check_cmd("clang", llvm_bin, args.clang) core_number = args.core_number crash_script = check_file(args.crash_script[0]) file_to_reduce = check_file(args.file_to_reduce[0]) r = Reduce(crash_script, file_to_reduce, core_number) r.simplify_clang_args() r.write_interestingness_test() r.clang_preprocess() r.run_creduce() r.reduce_clang_args() if __name__ == "__main__": main()