forked from OSchip/llvm-project
[HWASan] allow symbolizer script to index binaries by build id.
Tested on an example callstack with misplaced binaries from Android. Tested Regex against callstack without Build ID to confirm it still works. Reviewed By: eugenis Differential Revision: https://reviews.llvm.org/D123437
This commit is contained in:
parent
06285fc9fd
commit
a0570e7750
|
@ -21,6 +21,9 @@ import sys
|
|||
import string
|
||||
import subprocess
|
||||
import argparse
|
||||
import mmap
|
||||
import struct
|
||||
import os
|
||||
|
||||
if sys.version_info.major < 3:
|
||||
# Simulate Python 3.x behaviour of defaulting to UTF-8 for print. This is
|
||||
|
@ -31,6 +34,71 @@ if sys.version_info.major < 3:
|
|||
last_access_address = None
|
||||
last_access_tag = None
|
||||
|
||||
# Below, a parser for a subset of ELF. It only supports 64 bit, little-endian,
|
||||
# and only parses what is necessary to find the build ids. It uses a memoryview
|
||||
# into an mmap to avoid copying.
|
||||
Ehdr_size = 64
|
||||
e_shnum_offset = 60
|
||||
e_shoff_offset = 40
|
||||
|
||||
Shdr_size = 64
|
||||
sh_type_offset = 4
|
||||
sh_offset_offset = 24
|
||||
sh_size_offset = 32
|
||||
SHT_NOTE = 7
|
||||
|
||||
Nhdr_size = 12
|
||||
NT_GNU_BUILD_ID = 3
|
||||
|
||||
def align_up(size, alignment):
|
||||
return (size + alignment - 1) & ~(alignment - 1)
|
||||
|
||||
def handle_Nhdr(mv, sh_size):
|
||||
offset = 0
|
||||
while offset < sh_size:
|
||||
n_namesz, n_descsz, n_type = struct.unpack_from('<III', buffer=mv,
|
||||
offset=offset)
|
||||
if (n_type == NT_GNU_BUILD_ID and n_namesz == 4 and
|
||||
mv[offset + Nhdr_size: offset + Nhdr_size + 4] == b"GNU\x00"):
|
||||
value = mv[offset + Nhdr_size + 4: offset + Nhdr_size + 4 + n_descsz]
|
||||
return value.hex()
|
||||
offset += Nhdr_size + align_up(n_namesz, 4) + align_up(n_descsz, 4)
|
||||
return None
|
||||
|
||||
def handle_Shdr(mv):
|
||||
sh_type, = struct.unpack_from('<I', buffer=mv, offset=sh_type_offset)
|
||||
if sh_type != SHT_NOTE:
|
||||
return None, None
|
||||
sh_offset, = struct.unpack_from('<Q', buffer=mv, offset=sh_offset_offset)
|
||||
sh_size, = struct.unpack_from('<Q', buffer=mv, offset=sh_size_offset)
|
||||
return sh_offset, sh_size
|
||||
|
||||
def handle_elf(mv):
|
||||
# \x02 is ELFCLASS64, \x01 is ELFDATA2LSB. HWASan currently only works on
|
||||
# 64-bit little endian platforms (x86_64 and ARM64). If this changes, we will
|
||||
# have to extend the parsing code.
|
||||
if mv[:6] != b'\x7fELF\x02\x01':
|
||||
return None
|
||||
e_shnum, = struct.unpack_from('<H', buffer=mv, offset=e_shnum_offset)
|
||||
e_shoff, = struct.unpack_from('<Q', buffer=mv, offset=e_shoff_offset)
|
||||
for i in range(0, e_shnum):
|
||||
start = e_shoff + i * Shdr_size
|
||||
sh_offset, sh_size = handle_Shdr(mv[start: start + Shdr_size])
|
||||
if sh_offset is None:
|
||||
continue
|
||||
note_hdr = mv[sh_offset: sh_offset + sh_size]
|
||||
result = handle_Nhdr(note_hdr, sh_size)
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
def get_buildid(filename):
|
||||
with open(filename, "r") as fd:
|
||||
if os.fstat(fd.fileno()).st_size < Ehdr_size:
|
||||
return None
|
||||
with mmap.mmap(fd.fileno(), 0, access=mmap.ACCESS_READ) as m:
|
||||
with memoryview(m) as mv:
|
||||
return handle_elf(mv)
|
||||
|
||||
class Symbolizer:
|
||||
def __init__(self, path, binary_prefixes, paths_to_cut):
|
||||
self.__pipe = None
|
||||
|
@ -39,6 +107,7 @@ class Symbolizer:
|
|||
self.__paths_to_cut = paths_to_cut
|
||||
self.__log = False
|
||||
self.__warnings = set()
|
||||
self.__index = {}
|
||||
|
||||
def enable_logging(self, enable):
|
||||
self.__log = enable
|
||||
|
@ -77,9 +146,12 @@ class Symbolizer:
|
|||
file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
|
||||
return file_name
|
||||
|
||||
def __process_binary_name(self, name):
|
||||
def __process_binary_name(self, name, buildid=None):
|
||||
if name.startswith('/'):
|
||||
name = name[1:]
|
||||
if buildid is not None and buildid in self.__index:
|
||||
return self.__index[buildid]
|
||||
|
||||
for p in self.__binary_prefixes:
|
||||
full_path = os.path.join(p, name)
|
||||
if os.path.exists(full_path):
|
||||
|
@ -121,10 +193,10 @@ class Symbolizer:
|
|||
except Symbolizer.__EOF:
|
||||
pass
|
||||
|
||||
def iter_call_stack(self, binary, addr):
|
||||
def iter_call_stack(self, binary, buildid, addr):
|
||||
self.__open_pipe()
|
||||
p = self.__pipe
|
||||
binary = self.__process_binary_name(binary)
|
||||
binary = self.__process_binary_name(binary, buildid)
|
||||
if not binary:
|
||||
return
|
||||
self.__write("CODE %s %s" % (binary, addr))
|
||||
|
@ -137,15 +209,25 @@ class Symbolizer:
|
|||
except Symbolizer.__EOF:
|
||||
pass
|
||||
|
||||
def build_index(self):
|
||||
for p in self.__binary_prefixes:
|
||||
for dname, _, fnames in os.walk(p):
|
||||
for fn in fnames:
|
||||
filename = os.path.join(dname, fn)
|
||||
bid = get_buildid(filename)
|
||||
if bid is not None:
|
||||
self.__index[bid] = filename
|
||||
|
||||
def symbolize_line(line, symbolizer_path):
|
||||
#0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
|
||||
match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE)
|
||||
match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
|
||||
if match:
|
||||
frameno = match.group(2)
|
||||
binary = match.group(5)
|
||||
addr = int(match.group(6), 16)
|
||||
buildid = match.group(7)
|
||||
|
||||
frames = list(symbolizer.iter_call_stack(binary, addr))
|
||||
frames = list(symbolizer.iter_call_stack(binary, buildid, addr))
|
||||
|
||||
if len(frames) > 0:
|
||||
print("%s#%s%s%s in %s" % (match.group(1), match.group(2),
|
||||
|
@ -210,6 +292,7 @@ parser.add_argument('-v', action='store_true')
|
|||
parser.add_argument('--ignore-tags', action='store_true')
|
||||
parser.add_argument('--symbols', action='append')
|
||||
parser.add_argument('--source', action='append')
|
||||
parser.add_argument('--index', action='store_true')
|
||||
parser.add_argument('--symbolizer')
|
||||
parser.add_argument('args', nargs=argparse.REMAINDER)
|
||||
args = parser.parse_args()
|
||||
|
@ -297,6 +380,8 @@ if args.v:
|
|||
|
||||
symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
|
||||
symbolizer.enable_logging(args.d)
|
||||
if args.index:
|
||||
symbolizer.build_index()
|
||||
|
||||
for line in sys.stdin:
|
||||
if sys.version_info.major < 3:
|
||||
|
|
Loading…
Reference in New Issue