llvm-project/lld/test/MachO/tools/validate-unwind-info.py

#!/usr/bin/env python

"""Validate compact unwind info by cross checking the llvm-objdump
reports of the input object file vs final linked output.
"""
from __future__ import print_function
import sys
import argparse
import re
from pprint import pprint

def main():
  hex = "[a-f\d]"
  hex8 = hex + "{8}"

  parser = argparse.ArgumentParser(description=__doc__)
  parser.add_argument('files', metavar='FILES', nargs='*',
                      help='output of (llvm-objdump --unwind-info --syms) for object file(s) plus final linker output')
  parser.add_argument('--debug', action='store_true')
  args = parser.parse_args()

  if args.files:
    objdump_string = ''.join([open(f).read() for f in args.files])
  else:
    objdump_string = sys.stdin.read()

  object_encodings_list = [(symbol, encoding, personality, lsda)
    for symbol, encoding, personality, lsda in
    re.findall(r"start:\s+0x%s+\s+(\w+)\s+" % hex +
               r"length:\s+0x%s+\s+" % hex +
               r"compact encoding:\s+0x(%s+)(?:\s+" % hex +
               r"personality function:\s+0x(%s+)\s+\w+\s+" % hex +
               r"LSDA:\s+0x(%s+)\s+\w+(?: \+ 0x%s+)?)?" % (hex, hex),
               objdump_string, re.DOTALL)]
  object_encodings_map = {symbol:encoding
    for symbol, encoding, _, _ in object_encodings_list}
  if not object_encodings_map:
    sys.exit("no object encodings found in input")

  # generate-cfi-funcs.py doesn't generate unwind info for _main.
  object_encodings_map['_main'] = '00000000'

  program_symbols_map = {address:symbol
    for address, symbol in
    re.findall(r"^%s(%s) g\s+F __TEXT,__text (x\1|_main)$" % (hex8, hex8),
               objdump_string, re.MULTILINE)}
  if not program_symbols_map:
    sys.exit("no program symbols found in input")

  program_common_encodings = (
    re.findall(r"^\s+encoding\[(?:\d|\d\d|1[01]\d|12[0-6])\]: 0x(%s+)$" % hex,
               objdump_string, re.MULTILINE))
  if not program_common_encodings:
    sys.exit("no common encodings found in input")

  program_encodings_map = {program_symbols_map[address]:encoding
    for address, encoding in
    re.findall(r"^\s+\[\d+\]: function offset=0x(%s+), " % hex +
               r"encoding(?:\[\d+\])?=0x(%s+)$" % hex,
               objdump_string, re.MULTILINE)}
  if not object_encodings_map:
    sys.exit("no program encodings found in input")

  # Fold adjacent entries from the object file that have matching encodings
  # TODO(gkm) add check for personality+lsda
  encoding0 = 0
  for symbol in sorted(object_encodings_map):
    encoding = object_encodings_map[symbol]
    fold = (encoding == encoding0)
    if fold:
      del object_encodings_map[symbol]
    if args.debug:
      print("%s %s with %s" % (
              'delete' if fold else 'retain', symbol, encoding))
    encoding0 = encoding

  if program_encodings_map != object_encodings_map:
    if args.debug:
      print("program encodings map:")
      pprint(program_encodings_map)
      print("object encodings map:")
      pprint(object_encodings_map)
    sys.exit("encoding maps differ")

  # Count frequency of object-file folded encodings
  # and compare with the program-file common encodings table
  encoding_frequency_map = {}
  for _, encoding in object_encodings_map.items():
    encoding_frequency_map[encoding] = 1 + encoding_frequency_map.get(encoding, 0)
  encoding_frequencies = [x for x in
                          sorted(encoding_frequency_map,
                                 key=lambda x: (encoding_frequency_map.get(x), x),
                                 reverse=True)]
  del encoding_frequencies[127:]

  if program_common_encodings != encoding_frequencies:
    if args.debug:
      pprint("program common encodings:\n" + str(program_common_encodings))
      pprint("object encoding frequencies:\n" + str(encoding_frequencies))
    sys.exit("encoding frequencies differ")


if __name__ == '__main__':
  main()
[lld-macho] create __TEXT,__unwind_info from __LD,__compact_unwind Digest the input `__LD,__compact_unwind` and produce the output `__TEXT,__unwind_info`. This is the initial commit with the major functionality. Successor commits will add handling for ... * `__TEXT,__eh_frame` * personalities & LSDA * `-r` pass-through Differential Revision: https://reviews.llvm.org/D86805 2020-08-21 04:05:13 +08:00			`#!/usr/bin/env python`

			`"""Validate compact unwind info by cross checking the llvm-objdump`
			`reports of the input object file vs final linked output.`
			`"""`
lld/mach-o: Make tool scripts from 2124ca1d5cb py2.7-compatible 2020-09-19 21:17:02 +08:00			`from __future__ import print_function`
[lld-macho] create __TEXT,__unwind_info from __LD,__compact_unwind Digest the input `__LD,__compact_unwind` and produce the output `__TEXT,__unwind_info`. This is the initial commit with the major functionality. Successor commits will add handling for ... * `__TEXT,__eh_frame` * personalities & LSDA * `-r` pass-through Differential Revision: https://reviews.llvm.org/D86805 2020-08-21 04:05:13 +08:00			`import sys`
			`import argparse`
			`import re`
			`from pprint import pprint`

			`def main():`
			`hex = "[a-f\d]"`
			`hex8 = hex + "{8}"`

			`parser = argparse.ArgumentParser(description=__doc__)`
			`parser.add_argument('files', metavar='FILES', nargs='*',`
			`help='output of (llvm-objdump --unwind-info --syms) for object file(s) plus final linker output')`
			`parser.add_argument('--debug', action='store_true')`
			`args = parser.parse_args()`

			`if args.files:`
			`objdump_string = ''.join([open(f).read() for f in args.files])`
			`else:`
			`objdump_string = sys.stdin.read()`

			`object_encodings_list = [(symbol, encoding, personality, lsda)`
			`for symbol, encoding, personality, lsda in`
lld/mach-o: Make tool scripts from 2124ca1d5cb py2.7-compatible 2020-09-19 21:17:02 +08:00			`re.findall(r"start:\s+0x%s+\s+(\w+)\s+" % hex +`
			`r"length:\s+0x%s+\s+" % hex +`
			`r"compact encoding:\s+0x(%s+)(?:\s+" % hex +`
			`r"personality function:\s+0x(%s+)\s+\w+\s+" % hex +`
			`r"LSDA:\s+0x(%s+)\s+\w+(?: \+ 0x%s+)?)?" % (hex, hex),`
[lld-macho] create __TEXT,__unwind_info from __LD,__compact_unwind Digest the input `__LD,__compact_unwind` and produce the output `__TEXT,__unwind_info`. This is the initial commit with the major functionality. Successor commits will add handling for ... * `__TEXT,__eh_frame` * personalities & LSDA * `-r` pass-through Differential Revision: https://reviews.llvm.org/D86805 2020-08-21 04:05:13 +08:00			`objdump_string, re.DOTALL)]`
			`object_encodings_map = {symbol:encoding`
			`for symbol, encoding, _, _ in object_encodings_list}`
			`if not object_encodings_map:`
			`sys.exit("no object encodings found in input")`

[lld/mac] Add explicit "no unwind info" entries for functions without unwind info Fixes PR50529. With this, lld-linked Chromium base_unittests passes on arm macs. Surprisingly, no measurable impact on link time. Differential Revision: https://reviews.llvm.org/D104681 2021-06-22 10:29:11 +08:00			`# generate-cfi-funcs.py doesn't generate unwind info for _main.`
			`object_encodings_map['_main'] = '00000000'`

[lld-macho] create __TEXT,__unwind_info from __LD,__compact_unwind Digest the input `__LD,__compact_unwind` and produce the output `__TEXT,__unwind_info`. This is the initial commit with the major functionality. Successor commits will add handling for ... * `__TEXT,__eh_frame` * personalities & LSDA * `-r` pass-through Differential Revision: https://reviews.llvm.org/D86805 2020-08-21 04:05:13 +08:00			`program_symbols_map = {address:symbol`
			`for address, symbol in`
[lld/mac] Add explicit "no unwind info" entries for functions without unwind info Fixes PR50529. With this, lld-linked Chromium base_unittests passes on arm macs. Surprisingly, no measurable impact on link time. Differential Revision: https://reviews.llvm.org/D104681 2021-06-22 10:29:11 +08:00			`re.findall(r"^%s(%s) g\s+F __TEXT,__text (x\1\|_main)$" % (hex8, hex8),`
[lld-macho] create __TEXT,__unwind_info from __LD,__compact_unwind Digest the input `__LD,__compact_unwind` and produce the output `__TEXT,__unwind_info`. This is the initial commit with the major functionality. Successor commits will add handling for ... * `__TEXT,__eh_frame` * personalities & LSDA * `-r` pass-through Differential Revision: https://reviews.llvm.org/D86805 2020-08-21 04:05:13 +08:00			`objdump_string, re.MULTILINE)}`
			`if not program_symbols_map:`
			`sys.exit("no program symbols found in input")`

			`program_common_encodings = (`
Handle overflow beyond the 127 common encodings limit The common encodings table holds only 127 entries. The encodings index for compact entries is 8 bits wide, and indexes 127..255 are stored locally to each second-level page. Prior to this diff, lld would `fatal()` if encodings overflowed the 127 limit. This diff populates a per-second-level-page encodings table as needed. When the per-page encodings table hits its limit, we must terminate the page. If such early termination would consume fewer entries than a regular (non-compact) encoding page, then we prefer the regular format. Caveat: one reason the common-encoding table might overflow is because of DWARF debug-info references, which are not yet implemented and will come with a later diff. Differential Revision: https://reviews.llvm.org/D93267 2020-12-07 14:33:38 +08:00			`re.findall(r"^\s+encoding\[(?:\d\|\d\d\|1[01]\d\|12[0-6])\]: 0x(%s+)$" % hex,`
[lld-macho] create __TEXT,__unwind_info from __LD,__compact_unwind Digest the input `__LD,__compact_unwind` and produce the output `__TEXT,__unwind_info`. This is the initial commit with the major functionality. Successor commits will add handling for ... * `__TEXT,__eh_frame` * personalities & LSDA * `-r` pass-through Differential Revision: https://reviews.llvm.org/D86805 2020-08-21 04:05:13 +08:00			`objdump_string, re.MULTILINE))`
			`if not program_common_encodings:`
			`sys.exit("no common encodings found in input")`

			`program_encodings_map = {program_symbols_map[address]:encoding`
			`for address, encoding in`
lld/mach-o: Make tool scripts from 2124ca1d5cb py2.7-compatible 2020-09-19 21:17:02 +08:00			`re.findall(r"^\s+\[\d+\]: function offset=0x(%s+), " % hex +`
Handle overflow beyond the 127 common encodings limit The common encodings table holds only 127 entries. The encodings index for compact entries is 8 bits wide, and indexes 127..255 are stored locally to each second-level page. Prior to this diff, lld would `fatal()` if encodings overflowed the 127 limit. This diff populates a per-second-level-page encodings table as needed. When the per-page encodings table hits its limit, we must terminate the page. If such early termination would consume fewer entries than a regular (non-compact) encoding page, then we prefer the regular format. Caveat: one reason the common-encoding table might overflow is because of DWARF debug-info references, which are not yet implemented and will come with a later diff. Differential Revision: https://reviews.llvm.org/D93267 2020-12-07 14:33:38 +08:00			`r"encoding(?:\[\d+\])?=0x(%s+)$" % hex,`
[lld-macho] create __TEXT,__unwind_info from __LD,__compact_unwind Digest the input `__LD,__compact_unwind` and produce the output `__TEXT,__unwind_info`. This is the initial commit with the major functionality. Successor commits will add handling for ... * `__TEXT,__eh_frame` * personalities & LSDA * `-r` pass-through Differential Revision: https://reviews.llvm.org/D86805 2020-08-21 04:05:13 +08:00			`objdump_string, re.MULTILINE)}`
			`if not object_encodings_map:`
			`sys.exit("no program encodings found in input")`

			`# Fold adjacent entries from the object file that have matching encodings`
			`# TODO(gkm) add check for personality+lsda`
			`encoding0 = 0`
			`for symbol in sorted(object_encodings_map):`
			`encoding = object_encodings_map[symbol]`
			`fold = (encoding == encoding0)`
			`if fold:`
			`del object_encodings_map[symbol]`
			`if args.debug:`
lld/mach-o: Make tool scripts from 2124ca1d5cb py2.7-compatible 2020-09-19 21:17:02 +08:00			`print("%s %s with %s" % (`
			`'delete' if fold else 'retain', symbol, encoding))`
[lld-macho] create __TEXT,__unwind_info from __LD,__compact_unwind Digest the input `__LD,__compact_unwind` and produce the output `__TEXT,__unwind_info`. This is the initial commit with the major functionality. Successor commits will add handling for ... * `__TEXT,__eh_frame` * personalities & LSDA * `-r` pass-through Differential Revision: https://reviews.llvm.org/D86805 2020-08-21 04:05:13 +08:00			`encoding0 = encoding`

			`if program_encodings_map != object_encodings_map:`
			`if args.debug:`
[lld-macho] Emit personalities in compact unwind Note that there is a triple indirection involved with personalities and compact unwind: 1. Two bits of each CU encoding are used as an offset into the personality array. 2. Each entry of the personality array is an offset from the image base. The resulting address (after adding the image base) should point within the GOT. 3. The corresponding GOT entry contains the actual pointer to the personality function. To further complicate things, when the personality function is in the object file (as opposed to a dylib), its references in `__compact_unwind` may refer to it via a section + offset relocation instead of a symbol relocation. Since our GOT implementation can only create entries for symbols, we have to create a synthetic symbol at the given section offset. Reviewed By: clayborg Differential Revision: https://reviews.llvm.org/D95809 2021-02-09 02:47:33 +08:00			`print("program encodings map:")`
			`pprint(program_encodings_map)`
			`print("object encodings map:")`
			`pprint(object_encodings_map)`
[lld-macho] create __TEXT,__unwind_info from __LD,__compact_unwind Digest the input `__LD,__compact_unwind` and produce the output `__TEXT,__unwind_info`. This is the initial commit with the major functionality. Successor commits will add handling for ... * `__TEXT,__eh_frame` * personalities & LSDA * `-r` pass-through Differential Revision: https://reviews.llvm.org/D86805 2020-08-21 04:05:13 +08:00			`sys.exit("encoding maps differ")`

			`# Count frequency of object-file folded encodings`
			`# and compare with the program-file common encodings table`
			`encoding_frequency_map = {}`
			`for _, encoding in object_encodings_map.items():`
			`encoding_frequency_map[encoding] = 1 + encoding_frequency_map.get(encoding, 0)`
			`encoding_frequencies = [x for x in`
			`sorted(encoding_frequency_map,`
			`key=lambda x: (encoding_frequency_map.get(x), x),`
			`reverse=True)]`
Handle overflow beyond the 127 common encodings limit The common encodings table holds only 127 entries. The encodings index for compact entries is 8 bits wide, and indexes 127..255 are stored locally to each second-level page. Prior to this diff, lld would `fatal()` if encodings overflowed the 127 limit. This diff populates a per-second-level-page encodings table as needed. When the per-page encodings table hits its limit, we must terminate the page. If such early termination would consume fewer entries than a regular (non-compact) encoding page, then we prefer the regular format. Caveat: one reason the common-encoding table might overflow is because of DWARF debug-info references, which are not yet implemented and will come with a later diff. Differential Revision: https://reviews.llvm.org/D93267 2020-12-07 14:33:38 +08:00			`del encoding_frequencies[127:]`
[lld-macho] create __TEXT,__unwind_info from __LD,__compact_unwind Digest the input `__LD,__compact_unwind` and produce the output `__TEXT,__unwind_info`. This is the initial commit with the major functionality. Successor commits will add handling for ... * `__TEXT,__eh_frame` * personalities & LSDA * `-r` pass-through Differential Revision: https://reviews.llvm.org/D86805 2020-08-21 04:05:13 +08:00
			`if program_common_encodings != encoding_frequencies:`
			`if args.debug:`
Handle overflow beyond the 127 common encodings limit The common encodings table holds only 127 entries. The encodings index for compact entries is 8 bits wide, and indexes 127..255 are stored locally to each second-level page. Prior to this diff, lld would `fatal()` if encodings overflowed the 127 limit. This diff populates a per-second-level-page encodings table as needed. When the per-page encodings table hits its limit, we must terminate the page. If such early termination would consume fewer entries than a regular (non-compact) encoding page, then we prefer the regular format. Caveat: one reason the common-encoding table might overflow is because of DWARF debug-info references, which are not yet implemented and will come with a later diff. Differential Revision: https://reviews.llvm.org/D93267 2020-12-07 14:33:38 +08:00			`pprint("program common encodings:\n" + str(program_common_encodings))`
			`pprint("object encoding frequencies:\n" + str(encoding_frequencies))`
[lld-macho] create __TEXT,__unwind_info from __LD,__compact_unwind Digest the input `__LD,__compact_unwind` and produce the output `__TEXT,__unwind_info`. This is the initial commit with the major functionality. Successor commits will add handling for ... * `__TEXT,__eh_frame` * personalities & LSDA * `-r` pass-through Differential Revision: https://reviews.llvm.org/D86805 2020-08-21 04:05:13 +08:00			`sys.exit("encoding frequencies differ")`


			`if __name__ == '__main__':`
			`main()`