2020-08-21 04:05:13 +08:00
|
|
|
#!/usr/bin/env python
|
|
|
|
|
|
|
|
"""Validate compact unwind info by cross checking the llvm-objdump
|
|
|
|
reports of the input object file vs final linked output.
|
|
|
|
"""
|
2020-09-19 21:17:02 +08:00
|
|
|
from __future__ import print_function
|
2020-08-21 04:05:13 +08:00
|
|
|
import sys
|
|
|
|
import argparse
|
|
|
|
import re
|
|
|
|
from pprint import pprint
|
|
|
|
|
|
|
|
def main():
|
|
|
|
hex = "[a-f\d]"
|
|
|
|
hex8 = hex + "{8}"
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser(description=__doc__)
|
|
|
|
parser.add_argument('files', metavar='FILES', nargs='*',
|
|
|
|
help='output of (llvm-objdump --unwind-info --syms) for object file(s) plus final linker output')
|
|
|
|
parser.add_argument('--debug', action='store_true')
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
if args.files:
|
|
|
|
objdump_string = ''.join([open(f).read() for f in args.files])
|
|
|
|
else:
|
|
|
|
objdump_string = sys.stdin.read()
|
|
|
|
|
|
|
|
object_encodings_list = [(symbol, encoding, personality, lsda)
|
|
|
|
for symbol, encoding, personality, lsda in
|
2020-09-19 21:17:02 +08:00
|
|
|
re.findall(r"start:\s+0x%s+\s+(\w+)\s+" % hex +
|
|
|
|
r"length:\s+0x%s+\s+" % hex +
|
|
|
|
r"compact encoding:\s+0x(%s+)(?:\s+" % hex +
|
|
|
|
r"personality function:\s+0x(%s+)\s+\w+\s+" % hex +
|
|
|
|
r"LSDA:\s+0x(%s+)\s+\w+(?: \+ 0x%s+)?)?" % (hex, hex),
|
2020-08-21 04:05:13 +08:00
|
|
|
objdump_string, re.DOTALL)]
|
|
|
|
object_encodings_map = {symbol:encoding
|
|
|
|
for symbol, encoding, _, _ in object_encodings_list}
|
|
|
|
if not object_encodings_map:
|
|
|
|
sys.exit("no object encodings found in input")
|
|
|
|
|
2021-06-22 10:29:11 +08:00
|
|
|
# generate-cfi-funcs.py doesn't generate unwind info for _main.
|
|
|
|
object_encodings_map['_main'] = '00000000'
|
|
|
|
|
2020-08-21 04:05:13 +08:00
|
|
|
program_symbols_map = {address:symbol
|
|
|
|
for address, symbol in
|
2021-06-22 10:29:11 +08:00
|
|
|
re.findall(r"^%s(%s) g\s+F __TEXT,__text (x\1|_main)$" % (hex8, hex8),
|
2020-08-21 04:05:13 +08:00
|
|
|
objdump_string, re.MULTILINE)}
|
|
|
|
if not program_symbols_map:
|
|
|
|
sys.exit("no program symbols found in input")
|
|
|
|
|
|
|
|
program_common_encodings = (
|
2020-12-07 14:33:38 +08:00
|
|
|
re.findall(r"^\s+encoding\[(?:\d|\d\d|1[01]\d|12[0-6])\]: 0x(%s+)$" % hex,
|
2020-08-21 04:05:13 +08:00
|
|
|
objdump_string, re.MULTILINE))
|
|
|
|
if not program_common_encodings:
|
|
|
|
sys.exit("no common encodings found in input")
|
|
|
|
|
|
|
|
program_encodings_map = {program_symbols_map[address]:encoding
|
|
|
|
for address, encoding in
|
2020-09-19 21:17:02 +08:00
|
|
|
re.findall(r"^\s+\[\d+\]: function offset=0x(%s+), " % hex +
|
2020-12-07 14:33:38 +08:00
|
|
|
r"encoding(?:\[\d+\])?=0x(%s+)$" % hex,
|
2020-08-21 04:05:13 +08:00
|
|
|
objdump_string, re.MULTILINE)}
|
|
|
|
if not object_encodings_map:
|
|
|
|
sys.exit("no program encodings found in input")
|
|
|
|
|
|
|
|
# Fold adjacent entries from the object file that have matching encodings
|
|
|
|
# TODO(gkm) add check for personality+lsda
|
|
|
|
encoding0 = 0
|
|
|
|
for symbol in sorted(object_encodings_map):
|
|
|
|
encoding = object_encodings_map[symbol]
|
|
|
|
fold = (encoding == encoding0)
|
|
|
|
if fold:
|
|
|
|
del object_encodings_map[symbol]
|
|
|
|
if args.debug:
|
2020-09-19 21:17:02 +08:00
|
|
|
print("%s %s with %s" % (
|
|
|
|
'delete' if fold else 'retain', symbol, encoding))
|
2020-08-21 04:05:13 +08:00
|
|
|
encoding0 = encoding
|
|
|
|
|
|
|
|
if program_encodings_map != object_encodings_map:
|
|
|
|
if args.debug:
|
2021-02-09 02:47:33 +08:00
|
|
|
print("program encodings map:")
|
|
|
|
pprint(program_encodings_map)
|
|
|
|
print("object encodings map:")
|
|
|
|
pprint(object_encodings_map)
|
2020-08-21 04:05:13 +08:00
|
|
|
sys.exit("encoding maps differ")
|
|
|
|
|
|
|
|
# Count frequency of object-file folded encodings
|
|
|
|
# and compare with the program-file common encodings table
|
|
|
|
encoding_frequency_map = {}
|
|
|
|
for _, encoding in object_encodings_map.items():
|
|
|
|
encoding_frequency_map[encoding] = 1 + encoding_frequency_map.get(encoding, 0)
|
|
|
|
encoding_frequencies = [x for x in
|
|
|
|
sorted(encoding_frequency_map,
|
|
|
|
key=lambda x: (encoding_frequency_map.get(x), x),
|
|
|
|
reverse=True)]
|
2020-12-07 14:33:38 +08:00
|
|
|
del encoding_frequencies[127:]
|
2020-08-21 04:05:13 +08:00
|
|
|
|
|
|
|
if program_common_encodings != encoding_frequencies:
|
|
|
|
if args.debug:
|
2020-12-07 14:33:38 +08:00
|
|
|
pprint("program common encodings:\n" + str(program_common_encodings))
|
|
|
|
pprint("object encoding frequencies:\n" + str(encoding_frequencies))
|
2020-08-21 04:05:13 +08:00
|
|
|
sys.exit("encoding frequencies differ")
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|