forked from OSchip/llvm-project
[hwasan] Offline symbolization script.
Summary: A script to symbolize hwasan reports after the fact using unstripped binaries. Supports stack-based reports. Requires llvm-symbolizer (addr2line is not an option). Reviewers: pcc, hctim Subscribers: mgorny, #sanitizers, llvm-commits Tags: #sanitizers, #llvm Differential Revision: https://reviews.llvm.org/D71148
This commit is contained in:
parent
a0b025b8e7
commit
9ef451d1fd
|
@ -181,6 +181,8 @@ endforeach()
|
|||
|
||||
add_compiler_rt_resource_file(hwasan_blacklist hwasan_blacklist.txt hwasan)
|
||||
|
||||
add_subdirectory("scripts")
|
||||
|
||||
# if(COMPILER_RT_INCLUDE_TESTS)
|
||||
# add_subdirectory(tests)
|
||||
# endif()
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
add_compiler_rt_script(hwasan_symbolize)
|
||||
add_dependencies(hwasan hwasan_symbolize)
|
|
@ -0,0 +1,282 @@
|
|||
#!/usr/bin/env python
|
||||
#===- lib/hwasan/scripts/hwasan_symbolize ----------------------------------===#
|
||||
#
|
||||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
# See https:#llvm.org/LICENSE.txt for license information.
|
||||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
#
|
||||
#===------------------------------------------------------------------------===#
|
||||
#
|
||||
# HWAddressSanitizer offline symbolization script.
|
||||
#
|
||||
#===------------------------------------------------------------------------===#
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import string
|
||||
import subprocess
|
||||
import argparse
|
||||
|
||||
last_access_address = None
|
||||
last_access_tag = None
|
||||
|
||||
class Symbolizer:
|
||||
def __init__(self, path, binary_prefixes, paths_to_cut):
|
||||
self.__pipe = None
|
||||
self.__path = path
|
||||
self.__binary_prefixes = binary_prefixes
|
||||
self.__paths_to_cut = paths_to_cut
|
||||
self.__log = False
|
||||
|
||||
def enable_logging(self, enable):
|
||||
self.__log = enable
|
||||
|
||||
def __open_pipe(self):
|
||||
if not self.__pipe:
|
||||
self.__pipe = subprocess.Popen([self.__path, "-inlining", "-functions"],
|
||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
|
||||
|
||||
class __EOF:
|
||||
pass
|
||||
|
||||
def __write(self, s):
|
||||
print >>self.__pipe.stdin, s
|
||||
if self.__log:
|
||||
print >>sys.stderr, ("#>> |%s|" % (s,))
|
||||
|
||||
def __read(self):
|
||||
s = self.__pipe.stdout.readline().rstrip()
|
||||
if self.__log:
|
||||
print >>sys.stderr, ("# << |%s|" % (s,))
|
||||
if s == '':
|
||||
raise Symbolizer.__EOF
|
||||
return s
|
||||
|
||||
def __process_source_path(self, file_name):
|
||||
for path_to_cut in self.__paths_to_cut:
|
||||
file_name = re.sub(".*" + path_to_cut, "", file_name)
|
||||
file_name = re.sub(".*hwasan_[a-z_]*.(cc|h):[0-9]*", "[hwasan_rtl]", file_name)
|
||||
file_name = re.sub(".*asan_[a-z_]*.(cc|h):[0-9]*", "[asan_rtl]", file_name)
|
||||
file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
|
||||
return file_name
|
||||
|
||||
def __process_binary_name(self, name):
|
||||
if name.startswith('/'):
|
||||
name = name[1:]
|
||||
for p in self.__binary_prefixes:
|
||||
full_path = os.path.join(p, name)
|
||||
if os.path.exists(full_path):
|
||||
return full_path
|
||||
# Try stripping extra path components as the last resort.
|
||||
for p in self.__binary_prefixes:
|
||||
full_path = os.path.join(p, os.path.basename(name))
|
||||
if os.path.exists(full_path):
|
||||
return full_path
|
||||
print >>sys.stderr, "Could not find symbols for", name
|
||||
return None
|
||||
|
||||
def iter_locals(self, binary, addr):
|
||||
self.__open_pipe()
|
||||
p = self.__pipe
|
||||
binary = self.__process_binary_name(binary)
|
||||
if not binary:
|
||||
return
|
||||
self.__write("FRAME %s %s" % (binary, addr))
|
||||
try:
|
||||
while True:
|
||||
function_name = self.__read()
|
||||
local_name = self.__read()
|
||||
file_line = self.__read()
|
||||
extra = self.__read().split()
|
||||
|
||||
file_line = self.__process_source_path(file_line)
|
||||
offset = None if extra[0] == '??' else int(extra[0])
|
||||
size = None if extra[1] == '??' else int(extra[1])
|
||||
tag_offset = None if extra[2] == '??' else int(extra[2])
|
||||
yield (function_name, file_line, local_name, offset, size, tag_offset)
|
||||
except Symbolizer.__EOF:
|
||||
pass
|
||||
|
||||
def iter_call_stack(self, binary, addr):
|
||||
self.__open_pipe()
|
||||
p = self.__pipe
|
||||
binary = self.__process_binary_name(binary)
|
||||
if not binary:
|
||||
return
|
||||
self.__write("CODE %s %s" % (binary, addr))
|
||||
try:
|
||||
while True:
|
||||
function_name = self.__read()
|
||||
file_line = self.__read()
|
||||
file_line = self.__process_source_path(file_line)
|
||||
yield (function_name, file_line)
|
||||
except Symbolizer.__EOF:
|
||||
pass
|
||||
|
||||
def symbolize_line(line, symbolizer_path):
|
||||
#0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
|
||||
match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE)
|
||||
if match:
|
||||
frameno = match.group(2)
|
||||
binary = match.group(5)
|
||||
addr = int(match.group(6), 16)
|
||||
|
||||
frames = list(symbolizer.iter_call_stack(binary, addr))
|
||||
|
||||
if len(frames) > 0:
|
||||
print "%s#%s%s%s in %s" % (match.group(1).encode('utf-8'), match.group(2).encode('utf-8'),
|
||||
match.group(3).encode('utf-8'), frames[0][0], frames[0][1])
|
||||
for i in range(1, len(frames)):
|
||||
space1 = ' ' * match.end(1)
|
||||
space2 = ' ' * (match.start(4) - match.end(1) - 2)
|
||||
print "%s->%s%s in %s" % (space1, space2, frames[i][0], frames[i][1])
|
||||
else:
|
||||
print line.rstrip().encode('utf-8')
|
||||
else:
|
||||
print line.rstrip().encode('utf-8')
|
||||
|
||||
def save_access_address(line):
|
||||
global last_access_address, last_access_tag
|
||||
match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE)
|
||||
if match:
|
||||
last_access_address = int(match.group(2), 16)
|
||||
match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]+ tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE)
|
||||
if match:
|
||||
last_access_tag = int(match.group(2), 16)
|
||||
|
||||
def process_stack_history(line, symbolizer, ignore_tags=False):
|
||||
if last_access_address is None or last_access_tag is None:
|
||||
return
|
||||
if re.match(r'Previously allocated frames:', line, re.UNICODE):
|
||||
return True
|
||||
pc_mask = (1 << 48) - 1
|
||||
fp_mask = (1 << 20) - 1
|
||||
# record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD)
|
||||
match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE)
|
||||
if match:
|
||||
record_addr = int(match.group(2), 16)
|
||||
record = int(match.group(3), 16)
|
||||
binary = match.group(4)
|
||||
addr = int(match.group(5), 16)
|
||||
base_tag = (record_addr >> 3) & 0xFF
|
||||
fp = (record >> 48) << 4
|
||||
pc = record & pc_mask
|
||||
|
||||
for local in symbolizer.iter_locals(binary, addr):
|
||||
frame_offset = local[3]
|
||||
size = local[4]
|
||||
if frame_offset is None or size is None:
|
||||
continue
|
||||
obj_offset = (last_access_address - fp - frame_offset) & fp_mask
|
||||
if obj_offset >= size:
|
||||
continue
|
||||
tag_offset = local[5]
|
||||
if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != last_access_tag):
|
||||
continue
|
||||
print ''
|
||||
print 'Potentially referenced stack object:'
|
||||
print ' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0])
|
||||
print ' at %s' % (local[1],)
|
||||
return True
|
||||
return False
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-d', action='store_true')
|
||||
parser.add_argument('-v', action='store_true')
|
||||
parser.add_argument('--ignore-tags', action='store_true')
|
||||
parser.add_argument('--symbols', action='append')
|
||||
parser.add_argument('--source', action='append')
|
||||
parser.add_argument('--symbolizer')
|
||||
parser.add_argument('args', nargs=argparse.REMAINDER)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Unstripped binaries location.
|
||||
binary_prefixes = args.symbols or []
|
||||
if not binary_prefixes:
|
||||
if 'ANDROID_PRODUCT_OUT' in os.environ:
|
||||
product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols')
|
||||
binary_prefixes.append(product_out)
|
||||
|
||||
for p in binary_prefixes:
|
||||
if not os.path.isdir(p):
|
||||
print >>sys.stderr, "Symbols path does not exist or is not a directory:", p
|
||||
sys.exit(1)
|
||||
|
||||
# Source location.
|
||||
paths_to_cut = args.source or []
|
||||
if not paths_to_cut:
|
||||
paths_to_cut.append(os.getcwd() + '/')
|
||||
if 'ANDROID_BUILD_TOP' in os.environ:
|
||||
paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/')
|
||||
|
||||
# llvm-symbolizer binary.
|
||||
# 1. --symbolizer flag
|
||||
# 2. environment variable
|
||||
# 3. unsuffixed binary in the current directory
|
||||
# 4. if inside Android platform, prebuilt binary at a known path
|
||||
# 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the
|
||||
# highest available version in $PATH
|
||||
symbolizer_path = args.symbolizer
|
||||
if not symbolizer_path:
|
||||
if 'LLVM_SYMBOLIZER_PATH' in os.environ:
|
||||
symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH']
|
||||
elif 'HWASAN_SYMBOLIZER_PATH' in os.environ:
|
||||
symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH']
|
||||
|
||||
if not symbolizer_path:
|
||||
s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer')
|
||||
if os.path.exists(s):
|
||||
symbolizer_path = s
|
||||
|
||||
if not symbolizer_path:
|
||||
if 'ANDROID_BUILD_TOP' in os.environ:
|
||||
s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer')
|
||||
if os.path.exists(s):
|
||||
symbolizer_path = s
|
||||
|
||||
if not symbolizer_path:
|
||||
for path in os.environ["PATH"].split(os.pathsep):
|
||||
p = os.path.join(path, 'llvm-symbolizer')
|
||||
if os.path.exists(p):
|
||||
symbolizer_path = p
|
||||
break
|
||||
|
||||
def extract_version(s):
|
||||
idx = s.rfind('-')
|
||||
if idx == -1:
|
||||
return 0
|
||||
x = float(s[idx + 1:])
|
||||
return x
|
||||
|
||||
if not symbolizer_path:
|
||||
for path in os.environ["PATH"].split(os.pathsep):
|
||||
candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*'))
|
||||
if len(candidates) > 0:
|
||||
candidates.sort(key = extract_version, reverse = True)
|
||||
symbolizer_path = candidates[0]
|
||||
break
|
||||
|
||||
if not os.path.exists(symbolizer_path):
|
||||
print >>sys.stderr, "Symbolizer path does not exist:", symbolizer_path
|
||||
sys.exit(1)
|
||||
|
||||
if args.v:
|
||||
print "Looking for symbols in:"
|
||||
for s in binary_prefixes:
|
||||
print " %s" % (s,)
|
||||
print "Stripping source path prefixes:"
|
||||
for s in paths_to_cut:
|
||||
print " %s" % (s,)
|
||||
print "Using llvm-symbolizer binary in:\n %s" % (symbolizer_path,)
|
||||
print
|
||||
|
||||
symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
|
||||
symbolizer.enable_logging(args.d)
|
||||
|
||||
for line in sys.stdin:
|
||||
line = line.decode('utf-8')
|
||||
save_access_address(line)
|
||||
if process_stack_history(line, symbolizer, ignore_tags=args.ignore_tags):
|
||||
continue
|
||||
symbolize_line(line, symbolizer_path)
|
Loading…
Reference in New Issue