[HWASan] Clean up hwasan_symbolize.

The globals are better expressed as members of the Symbolizer, and all
functions operating on it should be methods instead.

Also using the standard idiom of wrapping the main code in
`if __name__ == '__main__'`.

Reviewed By: eugenis

Differential Revision: https://reviews.llvm.org/D125032
This commit is contained in:
Florian Mayer 2022-05-06 15:44:51 -07:00
parent d8564dcbcf
commit 68cd47e0ca
1 changed files with 176 additions and 173 deletions

View File

@ -31,9 +31,6 @@ if sys.version_info.major < 3:
import codecs
sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
last_access_address = None
last_access_tag = None
# Below, a parser for a subset of ELF. It only supports 64 bit, little-endian,
# and only parses what is necessary to find the build ids. It uses a memoryview
# into an mmap to avoid copying.
@ -110,6 +107,8 @@ class Symbolizer:
self.__index = {}
self.__link_prefixes = []
self.__html = False
self.__last_access_address = None
self.__last_access_tag = None
def enable_html(self, enable):
self.__html = enable
@ -268,7 +267,7 @@ class Symbolizer:
if bid is not None:
self.__index[bid] = filename
def symbolize_line(line, symbolizer_path):
def symbolize_line(self, line):
#0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) (BuildId: 4abce4cd41ea5c2f34753297b7e774d9)
match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)'
r'(?:\s*\(BuildId: ([0-9a-f]+)\))?', line, re.UNICODE)
@ -278,37 +277,36 @@ def symbolize_line(line, symbolizer_path):
addr = int(match.group(6), 16)
buildid = match.group(7)
frames = list(symbolizer.iter_call_stack(binary, buildid, addr))
frames = list(self.iter_call_stack(binary, buildid, addr))
if len(frames) > 0:
symbolizer.print(
symbolizer.maybe_escape(
self.print(
self.maybe_escape(
"%s#%s%s%s in " % (match.group(1), match.group(2), match.group(3),
frames[0][0])
) + symbolizer.maybe_linkify(frames[0][1]),
) + self.maybe_linkify(frames[0][1]),
escape=False)
for i in range(1, len(frames)):
space1 = ' ' * match.end(1)
space2 = ' ' * (match.start(4) - match.end(1) - 2)
symbolizer.print(
symbolizer.maybe_escape("%s->%s%s in " % (space1, space2, frames[i][0]))
+ symbolizer.maybe_linkify(frames[i][1]), escape=False)
self.print(
self.maybe_escape("%s->%s%s in " % (space1, space2, frames[i][0]))
+ self.maybe_linkify(frames[i][1]), escape=False)
else:
symbolizer.print(line.rstrip())
self.print(line.rstrip())
else:
symbolizer.print(line.rstrip())
self.print(line.rstrip())
def save_access_address(line):
global last_access_address, last_access_tag
def save_access_address(self, line):
match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE)
if match:
last_access_address = int(match.group(2), 16)
self.__last_access_address = int(match.group(2), 16)
match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE)
if match:
last_access_tag = int(match.group(2), 16)
self.__last_access_tag = int(match.group(2), 16)
def process_stack_history(line, symbolizer, ignore_tags=False):
if last_access_address is None or last_access_tag is None:
def process_stack_history(self, line, ignore_tags=False):
if self.__last_access_address is None or self.__last_access_tag is None:
return
if re.match(r'Previously allocated frames:', line, re.UNICODE):
return True
@ -327,24 +325,32 @@ def process_stack_history(line, symbolizer, ignore_tags=False):
fp = (record >> 48) << 4
pc = record & pc_mask
for local in symbolizer.iter_locals(binary, addr, buildid):
for local in self.iter_locals(binary, addr, buildid):
frame_offset = local[3]
size = local[4]
if frame_offset is None or size is None:
continue
obj_offset = (last_access_address - fp - frame_offset) & fp_mask
obj_offset = (self.__last_access_address - fp - frame_offset) & fp_mask
if obj_offset >= size:
continue
tag_offset = local[5]
if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != last_access_tag):
if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != self.__last_access_tag):
continue
symbolizer.print('')
symbolizer.print('Potentially referenced stack object:')
symbolizer.print(' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]))
symbolizer.print(' at %s' % (local[1],))
self.print('')
self.print('Potentially referenced stack object:')
self.print(' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]))
self.print(' at %s' % (local[1],))
return True
return False
def extract_version(s):
idx = s.rfind('-')
if idx == -1:
return 0
x = float(s[idx + 1:])
return x
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-d', action='store_true')
parser.add_argument('-v', action='store_true')
@ -410,13 +416,6 @@ if not symbolizer_path:
symbolizer_path = p
break
def extract_version(s):
idx = s.rfind('-')
if idx == -1:
return 0
x = float(s[idx + 1:])
return x
if not symbolizer_path:
for path in os.environ["PATH"].split(os.pathsep):
candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*'))
@ -454,7 +453,11 @@ if args.linkify:
for line in sys.stdin:
if sys.version_info.major < 3:
line = line.decode('utf-8')
save_access_address(line)
if process_stack_history(line, symbolizer, ignore_tags=args.ignore_tags):
symbolizer.save_access_address(line)
if symbolizer.process_stack_history(line, ignore_tags=args.ignore_tags):
continue
symbolize_line(line, symbolizer_path)
symbolizer.symbolize_line(line)
if __name__ == '__main__':
main()