Various fixes and additions to creduce-clang-crash.py

Some more additions to the script - mainly reducing the clang args after
the creduce run by removing them one by one and seeing if the crash
reproduces. Other things:

- remove the --crash flag when "fatal error" occurs
- fixed to read stack trace functions from the top
- run creduce on a copy of the original file

Patch by Amy Huang!

Differential Revision: https://reviews.llvm.org/D59725

llvm-svn: 357290
This commit is contained in:
George Burgess IV 2019-03-29 17:50:43 +00:00
parent 0bc9f15ddd
commit 5456beb944
1 changed files with 313 additions and 150 deletions

View File

@ -1,8 +1,14 @@
#!/usr/bin/env python #!/usr/bin/env python
"""Calls C-Reduce to create a minimal reproducer for clang crashes. """Calls C-Reduce to create a minimal reproducer for clang crashes.
Output files:
*.reduced.sh -- crash reproducer with minimal arguments
*.reduced.cpp -- the reduced file
*.test.sh -- interestingness test for C-Reduce
""" """
from argparse import ArgumentParser from __future__ import print_function
from argparse import ArgumentParser, RawTextHelpFormatter
import os import os
import re import re
import stat import stat
@ -15,10 +21,14 @@ import shutil
from distutils.spawn import find_executable from distutils.spawn import find_executable
verbose = False verbose = False
llvm_bin = None
creduce_cmd = None creduce_cmd = None
clang_cmd = None
not_cmd = None not_cmd = None
def verbose_print(*args, **kwargs):
if verbose:
print(*args, **kwargs)
def check_file(fname): def check_file(fname):
if not os.path.isfile(fname): if not os.path.isfile(fname):
sys.exit("ERROR: %s does not exist" % (fname)) sys.exit("ERROR: %s does not exist" % (fname))
@ -33,166 +43,339 @@ def check_cmd(cmd_name, cmd_dir, cmd_path=None):
cmd = find_executable(cmd_path) cmd = find_executable(cmd_path)
if cmd: if cmd:
return cmd return cmd
sys.exit("ERROR: executable %s not found" % (cmd_path)) sys.exit("ERROR: executable `%s` not found" % (cmd_path))
cmd = find_executable(cmd_name, path=cmd_dir) cmd = find_executable(cmd_name, path=cmd_dir)
if cmd: if cmd:
return cmd return cmd
sys.exit("ERROR: %s not found in %s" % (cmd_name, cmd_dir))
if not cmd_dir:
cmd_dir = "$PATH"
sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
def quote_cmd(cmd): def quote_cmd(cmd):
return ' '.join(arg if arg.startswith('$') else pipes.quote(arg) return ' '.join(pipes.quote(arg) for arg in cmd)
for arg in cmd)
def get_crash_cmd(crash_script): def write_to_script(text, filename):
with open(crash_script) as f: with open(filename, 'w') as f:
# Assume clang call is on the last line of the script f.write(text)
line = f.readlines()[-1] os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
cmd = shlex.split(line)
# Overwrite the script's clang with the user's clang path class Reduce(object):
new_clang = check_cmd('clang', llvm_bin) def __init__(self, crash_script, file_to_reduce):
cmd[0] = pipes.quote(new_clang) crash_script_name, crash_script_ext = os.path.splitext(crash_script)
return cmd file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
def has_expected_output(crash_cmd, expected_output): self.testfile = file_reduce_name + '.test.sh'
p = subprocess.Popen(crash_cmd, self.crash_script = crash_script_name + '.reduced' + crash_script_ext
stdout=subprocess.PIPE, self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
stderr=subprocess.STDOUT) shutil.copy(file_to_reduce, self.file_to_reduce)
crash_output, _ = p.communicate()
return all(msg in crash_output for msg in expected_output)
def get_expected_output(crash_cmd): self.clang = clang_cmd
p = subprocess.Popen(crash_cmd, self.clang_args = []
stdout=subprocess.PIPE, self.expected_output = []
stderr=subprocess.STDOUT) self.is_crash = True
crash_output, _ = p.communicate() self.creduce_flags = ["--tidy"]
# If there is an assertion failure, use that; self.read_clang_args(crash_script, file_to_reduce)
# otherwise use the last five stack trace functions self.read_expected_output()
assertion_re = r'Assertion `([^\']+)\' failed'
assertion_match = re.search(assertion_re, crash_output)
if assertion_match:
return [assertion_match.group(1)]
else:
stacktrace_re = r'#[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
matches = re.findall(stacktrace_re, crash_output)
return matches[-5:]
def write_interestingness_test(testfile, crash_cmd, expected_output, def get_crash_cmd(self, cmd=None, args=None, filename=None):
file_to_reduce): if not cmd:
filename = os.path.basename(file_to_reduce) cmd = self.clang
if filename not in crash_cmd: if not args:
sys.exit("ERROR: expected %s to be in the crash command" % filename) args = self.clang_args
if not filename:
filename = self.file_to_reduce
# Replace all instances of file_to_reduce with a command line variable return [cmd] + args + [filename]
output = ['#!/bin/bash',
'if [ -z "$1" ] ; then',
' f=%s' % (pipes.quote(filename)),
'else',
' f="$1"',
'fi']
cmd = ['$f' if s == filename else s for s in crash_cmd]
output.append('%s --crash %s >& t.log || exit 1' % (pipes.quote(not_cmd), def read_clang_args(self, crash_script, filename):
quote_cmd(cmd))) print("\nReading arguments from crash script...")
with open(crash_script) as f:
# Assume clang call is on the last line of the script
line = f.readlines()[-1]
cmd = shlex.split(line)
for msg in expected_output: # Remove clang and filename from the command
output.append('grep %s t.log || exit 1' % pipes.quote(msg)) # Assume the last occurrence of the filename is the clang input file
del cmd[0]
for i in range(len(cmd)-1, -1, -1):
if cmd[i] == filename:
del cmd[i]
break
self.clang_args = cmd
verbose_print("Clang arguments:", quote_cmd(self.clang_args))
with open(testfile, 'w') as f: def read_expected_output(self):
f.write('\n'.join(output)) print("\nGetting expected crash output...")
os.chmod(testfile, os.stat(testfile).st_mode | stat.S_IEXEC) p = subprocess.Popen(self.get_crash_cmd(),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
crash_output, _ = p.communicate()
result = []
def check_interestingness(testfile, file_to_reduce): # Remove color codes
testfile = os.path.abspath(testfile) ansi_escape = r'\x1b\[[0-?]*m'
crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
# Check that the test considers the original file interesting # Look for specific error messages
with open(os.devnull, 'w') as devnull: regexes = [r"Assertion `(.+)' failed", # Linux assert()
returncode = subprocess.call(testfile, stdout=devnull) r"Assertion failed: (.+),", # FreeBSD/Mac assert()
if returncode: r"fatal error: backend error: (.+)",
sys.exit("The interestingness test does not pass for the original file.") r"LLVM ERROR: (.+)",
r"UNREACHABLE executed (at .+)?!",
r"LLVM IR generation of ceclaration '(.+)'",
r"Generating code for declaration '(.+)'",
r"\*\*\* Bad machine code: (.+) \*\*\*"]
for msg_re in regexes:
match = re.search(msg_re, crash_output)
if match:
msg = match.group(1)
result = [msg]
print("Found message:", msg)
# Check that an empty file is not interesting if "fatal error:" in msg_re:
_, empty_file = tempfile.mkstemp() self.is_crash = False
with open(os.devnull, 'w') as devnull: break
returncode = subprocess.call([testfile, empty_file], stdout=devnull)
os.remove(empty_file)
if not returncode:
sys.exit("The interestingness test passes for an empty file.")
def clang_preprocess(file_to_reduce, crash_cmd, expected_output): # If no message was found, use the top five stack trace functions,
_, tmpfile = tempfile.mkstemp() # ignoring some common functions
shutil.copy(file_to_reduce, tmpfile) # Five is a somewhat arbitrary number; the goal is to get a small number
# of identifying functions with some leeway for common functions
if not result:
stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
filters = ["PrintStackTraceSignalHandler",
"llvm::sys::RunSignalHandlers",
"SignalHandler", "__restore_rt", "gsignal", "abort"]
matches = re.findall(stacktrace_re, crash_output)
result = [x for x in matches if x and x.strip() not in filters][:5]
for msg in result:
print("Found stack trace function:", msg)
cmd = crash_cmd + ['-E', '-P'] if not result:
p = subprocess.Popen(cmd, print("ERROR: no crash was found")
stdout=subprocess.PIPE, print("The crash output was:\n========\n%s========" % crash_output)
stderr=subprocess.STDOUT) sys.exit(1)
preprocessed, _ = p.communicate()
with open(file_to_reduce, 'w') as f: self.expected_output = result
f.write(preprocessed)
if has_expected_output(crash_cmd, expected_output): def check_expected_output(self, args=None, filename=None):
if verbose: if not args:
print("Successfuly preprocessed with %s" % (quote_cmd(cmd))) args = self.clang_args
os.remove(tmpfile) if not filename:
else: filename = self.file_to_reduce
if verbose:
print("Failed to preprocess with %s" % (quote_cmd(cmd)))
shutil.move(tmpfile, file_to_reduce)
p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
crash_output, _ = p.communicate()
return all(msg in crash_output.decode('utf-8') for msg in
self.expected_output)
def filter_args(args, opts_startswith=[]): def write_interestingness_test(self):
result = [arg for arg in args if all(not arg.startswith(a) for a in print("\nCreating the interestingness test...")
opts_startswith)]
return result
def try_remove_args(cmd, expected_output, msg=None, extra_arg=None, **kwargs): crash_flag = "--crash" if self.is_crash else ""
new_cmd = filter_args(cmd, **kwargs)
if extra_arg and extra_arg not in new_cmd:
new_cmd.append(extra_arg)
if new_cmd != cmd and has_expected_output(new_cmd, expected_output):
if msg and verbose:
print(msg)
return new_cmd
return cmd
def simplify_crash_cmd(crash_cmd, expected_output): output = "#!/bin/bash\n%s %s %s >& t.log || exit 1\n" % \
new_cmd = try_remove_args(crash_cmd, expected_output, (pipes.quote(not_cmd), crash_flag, quote_cmd(self.get_crash_cmd()))
msg="Removed debug info options",
opts_startswith=["-gcodeview", for msg in self.expected_output:
"-dwarf-column-info", output += 'grep %s t.log || exit 1\n' % pipes.quote(msg)
"-debug-info-kind=",
"-debugger-tuning=", write_to_script(output, self.testfile)
"-gdwarf"]) self.check_interestingness()
new_cmd = try_remove_args(new_cmd, expected_output,
msg="Replaced -W options with -w", def check_interestingness(self):
extra_arg='-w', testfile = os.path.abspath(self.testfile)
opts_startswith=["-W"])
new_cmd = try_remove_args(new_cmd, expected_output, # Check that the test considers the original file interesting
msg="Replaced optimization level with -O0", with open(os.devnull, 'w') as devnull:
extra_arg="-O0", returncode = subprocess.call(testfile, stdout=devnull)
opts_startswith=["-O"]) if returncode:
return new_cmd sys.exit("The interestingness test does not pass for the original file.")
# Check that an empty file is not interesting
# Instead of modifying the filename in the test file, just run the command
with tempfile.NamedTemporaryFile() as empty_file:
is_interesting = self.check_expected_output(filename=empty_file.name)
if is_interesting:
sys.exit("The interestingness test passes for an empty file.")
def clang_preprocess(self):
print("\nTrying to preprocess the source file...")
with tempfile.NamedTemporaryFile() as tmpfile:
cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name]
cmd_preprocess_no_lines = cmd_preprocess + ['-P']
try:
subprocess.check_call(cmd_preprocess_no_lines)
if self.check_expected_output(filename=tmpfile.name):
print("Successfully preprocessed with line markers removed")
shutil.copy(tmpfile.name, self.file_to_reduce)
else:
subprocess.check_call(cmd_preprocess)
if self.check_expected_output(filename=tmpfile.name):
print("Successfully preprocessed without removing line markers")
shutil.copy(tmpfile.name, self.file_to_reduce)
else:
print("No longer crashes after preprocessing -- "
"using original source")
except subprocess.CalledProcessError:
print("Preprocessing failed")
@staticmethod
def filter_args(args, opts_equal=[], opts_startswith=[],
opts_one_arg_startswith=[]):
result = []
skip_next = False
for arg in args:
if skip_next:
skip_next = False
continue
if any(arg == a for a in opts_equal):
continue
if any(arg.startswith(a) for a in opts_startswith):
continue
if any(arg.startswith(a) for a in opts_one_arg_startswith):
skip_next = True
continue
result.append(arg)
return result
def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
new_args = self.filter_args(args, **kwargs)
if extra_arg:
if extra_arg in new_args:
new_args.remove(extra_arg)
new_args.append(extra_arg)
if (new_args != args and
self.check_expected_output(args=new_args)):
if msg:
verbose_print(msg)
return new_args
return args
def try_remove_arg_by_index(self, args, index):
new_args = args[:index] + args[index+1:]
removed_arg = args[index]
# Heuristic for grouping arguments:
# remove next argument if it doesn't start with "-"
if index < len(new_args) and not new_args[index].startswith('-'):
del new_args[index]
removed_arg += ' ' + args[index+1]
if self.check_expected_output(args=new_args):
verbose_print("Removed", removed_arg)
return new_args, index
return args, index+1
def simplify_clang_args(self):
"""Simplify clang arguments before running C-Reduce to reduce the time the
interestingness test takes to run.
"""
print("\nSimplifying the clang command...")
# Remove some clang arguments to speed up the interestingness test
new_args = self.clang_args
new_args = self.try_remove_args(new_args,
msg="Removed debug info options",
opts_startswith=["-gcodeview",
"-debug-info-kind=",
"-debugger-tuning="])
# Not suppressing warnings (-w) sometimes prevents the crash from occurring
# after preprocessing
new_args = self.try_remove_args(new_args,
msg="Replaced -W options with -w",
extra_arg='-w',
opts_startswith=["-W"])
new_args = self.try_remove_args(new_args,
msg="Replaced optimization level with -O0",
extra_arg="-O0",
opts_startswith=["-O"])
# Try to remove compilation steps
new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
extra_arg="-emit-llvm")
new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
extra_arg="-fsyntax-only")
# Try to make implicit int an error for more sensible test output
new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
opts_equal=["-w"],
extra_arg="-Werror=implicit-int")
self.clang_args = new_args
verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
def reduce_clang_args(self):
"""Minimize the clang arguments after running C-Reduce, to get the smallest
command that reproduces the crash on the reduced file.
"""
print("\nReducing the clang crash command...")
new_args = self.clang_args
# Remove some often occurring args
new_args = self.try_remove_args(new_args, msg="Removed -D options",
opts_startswith=["-D"])
new_args = self.try_remove_args(new_args, msg="Removed -D options",
opts_one_arg_startswith=["-D"])
new_args = self.try_remove_args(new_args, msg="Removed -I options",
opts_startswith=["-I"])
new_args = self.try_remove_args(new_args, msg="Removed -I options",
opts_one_arg_startswith=["-I"])
new_args = self.try_remove_args(new_args, msg="Removed -W options",
opts_startswith=["-W"])
# Remove other cases that aren't covered by the heuristic
new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
opts_one_arg_startswith=["-mllvm"])
i = 0
while i < len(new_args):
new_args, i = self.try_remove_arg_by_index(new_args, i)
self.clang_args = new_args
reduced_cmd = quote_cmd(self.get_crash_cmd())
write_to_script(reduced_cmd, self.crash_script)
print("Reduced command:", reduced_cmd)
def run_creduce(self):
print("\nRunning C-Reduce...")
try:
p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
[self.testfile, self.file_to_reduce])
p.communicate()
except KeyboardInterrupt:
# Hack to kill C-Reduce because it jumps into its own pgid
print('\n\nctrl-c detected, killed creduce')
p.kill()
def main(): def main():
global verbose global verbose
global llvm_bin
global creduce_cmd global creduce_cmd
global clang_cmd
global not_cmd global not_cmd
parser = ArgumentParser(description=__doc__) parser = ArgumentParser(description=__doc__,
formatter_class=RawTextHelpFormatter)
parser.add_argument('crash_script', type=str, nargs=1, parser.add_argument('crash_script', type=str, nargs=1,
help="Name of the script that generates the crash.") help="Name of the script that generates the crash.")
parser.add_argument('file_to_reduce', type=str, nargs=1, parser.add_argument('file_to_reduce', type=str, nargs=1,
help="Name of the file to be reduced.") help="Name of the file to be reduced.")
parser.add_argument('--llvm-bin', dest='llvm_bin', type=str, parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
required=True, help="Path to the LLVM bin directory.") help="Path to the LLVM bin directory.")
parser.add_argument('--llvm-not', dest='llvm_not', type=str, parser.add_argument('--llvm-not', dest='llvm_not', type=str,
help="The path to the `not` executable. " help="The path to the `not` executable. "
"By default uses the llvm-bin directory.") "By default uses the llvm-bin directory.")
parser.add_argument('--clang', dest='clang', type=str,
help="The path to the `clang` executable. "
"By default uses the llvm-bin directory.")
parser.add_argument('--creduce', dest='creduce', type=str, parser.add_argument('--creduce', dest='creduce', type=str,
help="The path to the `creduce` executable. " help="The path to the `creduce` executable. "
"Required if `creduce` is not in PATH environment.") "Required if `creduce` is not in PATH environment.")
@ -200,41 +383,21 @@ def main():
args = parser.parse_args() args = parser.parse_args()
verbose = args.verbose verbose = args.verbose
llvm_bin = os.path.abspath(args.llvm_bin) llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
creduce_cmd = check_cmd('creduce', None, args.creduce) creduce_cmd = check_cmd('creduce', None, args.creduce)
clang_cmd = check_cmd('clang', llvm_bin, args.clang)
not_cmd = check_cmd('not', llvm_bin, args.llvm_not) not_cmd = check_cmd('not', llvm_bin, args.llvm_not)
crash_script = check_file(args.crash_script[0]) crash_script = check_file(args.crash_script[0])
file_to_reduce = check_file(args.file_to_reduce[0]) file_to_reduce = check_file(args.file_to_reduce[0])
print("\nParsing the crash script and getting expected output...") r = Reduce(crash_script, file_to_reduce)
crash_cmd = get_crash_cmd(crash_script)
expected_output = get_expected_output(crash_cmd) r.simplify_clang_args()
if len(expected_output) < 1: r.write_interestingness_test()
sys.exit("ERROR: no crash was found") r.clang_preprocess()
r.run_creduce()
print("\nSimplifying the crash command...") r.reduce_clang_args()
crash_cmd = simplify_crash_cmd(crash_cmd, expected_output)
print("\nWriting interestingness test to file...")
testfile = os.path.splitext(file_to_reduce)[0] + '.test.sh'
write_interestingness_test(testfile, crash_cmd, expected_output,
file_to_reduce)
check_interestingness(testfile, file_to_reduce)
print("\nPreprocessing the file to reduce...")
clang_preprocess(file_to_reduce, crash_cmd, expected_output)
print("\nRunning C-Reduce...")
try:
p = subprocess.Popen([creduce_cmd, testfile, file_to_reduce])
p.communicate()
except KeyboardInterrupt:
# Hack to kill C-Reduce because it jumps into its own pgid
print('\n\nctrl-c detected, killed creduce')
p.kill()
# FIXME: reduce the clang crash command
if __name__ == '__main__': if __name__ == '__main__':
main() main()