llvm-project/llvm-libgcc/generate_version_script.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

132 lines
4.4 KiB
Python
Raw Normal View History

[llvm-libgcc] initial commit Note: the term "libgcc" refers to the all of `libgcc.a`, `libgcc_eh.a`, and `libgcc_s.so`. Enabling libunwind as a replacement for libgcc on Linux has proven to be challenging since libgcc_s.so is a required dependency in the [Linux standard base][5]. Some software is transitively dependent on libgcc because glibc makes hardcoded calls to functions in libgcc_s. For example, the function `__GI___backtrace` eventually makes its way to a [hardcoded dlopen to libgcc_s' _Unwind_Backtrace][1]. Since libgcc_{eh.a,s.so} and libunwind have the same ABI, but different implementations, the two libraries end up [cross-talking, which ultimately results in a segfault][2]. To solve this problem, libunwind needs to build a “libgcc”. That is, link the necessary functions from compiler-rt and libunwind into an archive and shared object that advertise themselves as `libgcc.a`, `libgcc_eh.a`, and `libgcc_s.so`, so that glibc’s baked calls are diverted to the correct objects in memory. Fortunately for us, compiler-rt and libunwind use the same ABI as the libgcc family, so the problem is solvable at the llvm-project configuration level: no program source needs to be edited. Thus, the end result is for a user to configure their LLVM build with a flag that indicates they want to archive compiler-rt/unwind as libgcc. We achieve this by compiling libunwind with all the symbols necessary for compiler-rt to emulate the libgcc family, and then generate symlinks named for our "libgcc" that point to their corresponding libunwind counterparts. We alternatively considered patching glibc so that the source doesn't directly refer to libgcc, but rather _defaults_ to libgcc, so that a system preferring compiler-rt/libunwind can point to these libraries at the config stage instead. Even if we modified the Linux standard base, this alternative won't work because binaries that are built using libgcc will still end up having crosstalk between the differing implementations. This problem has been solved in this manner for [FreeBSD][3], and this CL has been tested against [Chrome OS][4]. [1]: https://github.com/bminor/glibc/blob/master/sysdeps/arm/backtrace.c#L68 [2]: https://bugs.chromium.org/p/chromium/issues/detail?id=1162190#c16 [3]: https://github.com/freebsd/freebsd-src/tree/main/lib/libgcc_s [4]: https://chromium-review.googlesource.com/c/chromiumos/overlays/chromiumos-overlay/+/2945947 [5]: https://refspecs.linuxbase.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/libgcc-s.html Differential Revision: https://reviews.llvm.org/D108416
2021-08-18 04:23:22 +08:00
#!/usr/bin/env python3
# Generates a version script for an architecture so that it can be incorporated
# into gcc_s.ver.
from collections import defaultdict
from itertools import chain
import argparse, subprocess, sys, os
def split_suffix(symbol):
"""
Splits a symbol such as `__gttf2@GCC_3.0` into a triple representing its
function name (__gttf2), version name (GCC_3.0), and version number (300).
The version number acts as a priority. Since earlier versions are more
accessible and are likely to be used more, the lower the number is, the higher
its priortiy. A symbol that has a '@@' instead of '@' has been designated by
the linker as the default symbol, and is awarded a priority of -1.
"""
if '@' not in symbol:
return None
data = [i for i in filter(lambda s: s, symbol.split('@'))]
_, version = data[-1].split('_')
version = version.replace('.', '')
priority = -1 if '@@' in symbol else int(version + '0' *
(3 - len(version)))
return data[0], data[1], priority
def invert_mapping(symbol_map):
"""Transforms a map from Key->Value to Value->Key."""
store = defaultdict(list)
for symbol, (version, _) in symbol_map.items():
store[version].append(symbol)
result = []
for k, v in store.items():
v.sort()
result.append((k, v))
result.sort(key=lambda x: x[0])
return result
def intersection(llvm, gcc):
"""
Finds the intersection between the symbols extracted from compiler-rt.a/libunwind.a
and libgcc_s.so.1.
"""
common_symbols = {}
for i in gcc:
suffix_triple = split_suffix(i)
if not suffix_triple:
continue
symbol, version_name, version_number = suffix_triple
if symbol in llvm:
if symbol not in common_symbols:
common_symbols[symbol] = (version_name, version_number)
continue
if version_number < common_symbols[symbol][1]:
common_symbols[symbol] = (version_name, version_number)
return invert_mapping(common_symbols)
def find_function_names(path):
"""
Runs readelf on a binary and reduces to only defined functions. Equivalent to
`llvm-readelf --wide ${path} | grep 'FUNC' | grep -v 'UND' | awk '{print $8}'`.
"""
result = subprocess.run(args=['llvm-readelf', '-su', path],
capture_output=True)
if result.returncode != 0:
print(result.stderr.decode('utf-8'), file=sys.stderr)
sys.exit(1)
stdout = result.stdout.decode('utf-8')
stdout = filter(lambda x: 'FUNC' in x and 'UND' not in x,
stdout.split('\n'))
stdout = chain(
map(lambda x: filter(None, x), (i.split(' ') for i in stdout)))
return [list(i)[7] for i in stdout]
def to_file(versioned_symbols):
path = f'{os.path.dirname(os.path.realpath(__file__))}/new-gcc_s-symbols'
with open(path, 'w') as f:
f.write('Do not check this version script in: you should instead work '
'out which symbols are missing in `lib/gcc_s.ver` and then '
'integrate them into `lib/gcc_s.ver`. For more information, '
'please see `doc/LLVMLibgcc.rst`.\n')
for version, symbols in versioned_symbols:
f.write(f'{version} {{\n')
for i in symbols:
f.write(f' {i};\n')
f.write('};\n\n')
def read_args():
parser = argparse.ArgumentParser()
parser.add_argument('--compiler_rt',
type=str,
help='Path to `libclang_rt.builtins-${ARCH}.a`.',
required=True)
parser.add_argument('--libunwind',
type=str,
help='Path to `libunwind.a`.',
required=True)
parser.add_argument(
'--libgcc_s',
type=str,
help=
'Path to `libgcc_s.so.1`. Note that unlike the other two arguments, this is a dynamic library.',
required=True)
return parser.parse_args()
def main():
args = read_args()
llvm = find_function_names(args.compiler_rt) + find_function_names(
args.libunwind)
gcc = find_function_names(args.libgcc_s)
versioned_symbols = intersection(llvm, gcc)
# TODO(cjdb): work out a way to integrate new symbols in with the existing
# ones
to_file(versioned_symbols)
if __name__ == '__main__':
main()