foundationdb/contrib/Implib.so/implib-gen.py

561 lines
18 KiB
Python
Executable File

#!/usr/bin/python3
# Copyright 2017-2022 Yury Gribov
#
# The MIT License (MIT)
#
# Use of this source code is governed by MIT license that can be
# found in the LICENSE.txt file.
"""
Generates static import library for POSIX shared library
"""
import sys
import os.path
import re
import subprocess
import argparse
import string
import configparser
me = os.path.basename(__file__)
root = os.path.dirname(__file__)
def warn(msg):
"""Emits a nicely-decorated warning."""
sys.stderr.write(f"{me}: warning: {msg}\n")
def error(msg):
"""Emits a nicely-decorated error and exits."""
sys.stderr.write(f"{me}: error: {msg}\n")
sys.exit(1)
def run(args, stdin=""):
"""Runs external program and aborts on error."""
env = os.environ.copy()
# Force English language
env["LC_ALL"] = "c"
try:
del env["LANG"]
except KeyError:
pass
with subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) as p:
out, err = p.communicate(input=stdin.encode("utf-8"))
out = out.decode("utf-8")
err = err.decode("utf-8")
if p.returncode != 0 or err:
error(f"{args[0]} failed with retcode {p.returncode}:\n{err}")
return out, err
def make_toc(words, renames=None):
"Make an mapping of words to their indices in list"
renames = renames or {}
toc = {}
for i, n in enumerate(words):
name = renames.get(n, n)
toc[i] = name
return toc
def parse_row(words, toc, hex_keys):
"Make a mapping from column names to values"
vals = {k: (words[i] if i < len(words) else "") for i, k in toc.items()}
for k in hex_keys:
if vals[k]:
vals[k] = int(vals[k], 16)
return vals
def collect_syms(f):
"""Collect ELF dynamic symtab."""
# --dyn-syms does not always work for some reason so dump all symtabs
out, _ = run(["readelf", "-sW", f])
toc = None
syms = []
syms_set = set()
for line in out.splitlines():
line = line.strip()
if not line:
# Next symtab
toc = None
continue
words = re.split(r" +", line)
if line.startswith("Num"): # Header?
if toc is not None:
error("multiple headers in output of readelf")
# Colons are different across readelf versions so get rid of them.
toc = make_toc(map(lambda n: n.replace(":", ""), words))
elif toc is not None:
sym = parse_row(words, toc, ["Value"])
name = sym["Name"]
if not name:
continue
if name in syms_set:
continue
syms_set.add(name)
sym["Size"] = int(sym["Size"], 0) # Readelf is inconistent on Size format
if "@" in name:
sym["Default"] = "@@" in name
name, ver = re.split(r"@+", name)
sym["Name"] = name
sym["Version"] = ver
else:
sym["Default"] = True
sym["Version"] = None
syms.append(sym)
if toc is None:
error(f"failed to analyze symbols in {f}")
# Also collected demangled names
if syms:
out, _ = run(["c++filt"], "\n".join((sym["Name"] for sym in syms)))
out = out.rstrip("\n") # Some c++filts append newlines at the end
for i, name in enumerate(out.split("\n")):
syms[i]["Demangled Name"] = name
return syms
def collect_relocs(f):
"""Collect ELF dynamic relocs."""
out, _ = run(["readelf", "-rW", f])
toc = None
rels = []
for line in out.splitlines():
line = line.strip()
if not line:
toc = None
continue
if line == "There are no relocations in this file.":
return []
if re.match(r"^\s*Offset", line): # Header?
if toc is not None:
error("multiple headers in output of readelf")
words = re.split(r"\s\s+", line) # "Symbol's Name + Addend"
toc = make_toc(words)
elif toc is not None:
line = re.sub(r" \+ ", "+", line)
words = re.split(r"\s+", line)
rel = parse_row(words, toc, ["Offset", "Info"])
rels.append(rel)
# Split symbolic representation
sym_name = "Symbol's Name + Addend"
if sym_name not in rel and "Symbol's Name" in rel:
# Adapt to different versions of readelf
rel[sym_name] = rel["Symbol's Name"] + "+0"
if rel[sym_name]:
p = rel[sym_name].split("+")
if len(p) == 1:
p = ["", p[0]]
rel[sym_name] = (p[0], int(p[1], 16))
if toc is None:
error(f"failed to analyze relocations in {f}")
return rels
def collect_sections(f):
"""Collect section info from ELF."""
out, _ = run(["readelf", "-SW", f])
toc = None
sections = []
for line in out.splitlines():
line = line.strip()
if not line:
continue
line = re.sub(r"\[\s+", "[", line)
words = re.split(r" +", line)
if line.startswith("[Nr]"): # Header?
if toc is not None:
error("multiple headers in output of readelf")
toc = make_toc(words, {"Addr": "Address"})
elif line.startswith("[") and toc is not None:
sec = parse_row(words, toc, ["Address", "Off", "Size"])
if "A" in sec["Flg"]: # Allocatable section?
sections.append(sec)
if toc is None:
error(f"failed to analyze sections in {f}")
return sections
def read_unrelocated_data(input_name, syms, secs):
"""Collect unrelocated data from ELF."""
data = {}
with open(input_name, "rb") as f:
def is_symbol_in_section(sym, sec):
sec_end = sec["Address"] + sec["Size"]
is_start_in_section = sec["Address"] <= sym["Value"] < sec_end
is_end_in_section = sym["Value"] + sym["Size"] <= sec_end
return is_start_in_section and is_end_in_section
for name, s in sorted(syms.items(), key=lambda s: s[1]["Value"]):
# TODO: binary search (bisect)
sec = [sec for sec in secs if is_symbol_in_section(s, sec)]
if len(sec) != 1:
error(f"failed to locate section for interval [{s['Value']:x}, {s['Value'] + s['Size']:x})")
sec = sec[0]
f.seek(sec["Off"])
data[name] = f.read(s["Size"])
return data
def collect_relocated_data(syms, bites, rels, ptr_size, reloc_types):
"""Identify relocations for each symbol"""
data = {}
for name, s in sorted(syms.items()):
b = bites.get(name)
assert b is not None
if s["Demangled Name"].startswith("typeinfo name"):
data[name] = [("byte", int(x)) for x in b]
continue
data[name] = []
for i in range(0, len(b), ptr_size):
val = int.from_bytes(b[i * ptr_size : (i + 1) * ptr_size], byteorder="little")
data[name].append(("offset", val))
start = s["Value"]
finish = start + s["Size"]
# TODO: binary search (bisect)
for rel in rels:
if rel["Type"] in reloc_types and start <= rel["Offset"] < finish:
i = (rel["Offset"] - start) // ptr_size
assert i < len(data[name])
data[name][i] = "reloc", rel
return data
def generate_vtables(cls_tables, cls_syms, cls_data):
"""Generate code for vtables"""
c_types = {"reloc": "const void *", "byte": "unsigned char", "offset": "size_t"}
ss = []
ss.append(
"""\
#ifdef __cplusplus
extern "C" {
#endif
"""
)
# Print externs
printed = set()
for name, data in sorted(cls_data.items()):
for typ, val in data:
if typ != "reloc":
continue
sym_name, addend = val["Symbol's Name + Addend"]
sym_name = re.sub(r"@.*", "", sym_name) # Can we pin version in C?
if sym_name not in cls_syms and sym_name not in printed:
ss.append(
f"""\
extern const char {sym_name}[];
"""
)
# Collect variable infos
code_info = {}
for name, s in sorted(cls_syms.items()):
data = cls_data[name]
if s["Demangled Name"].startswith("typeinfo name"):
declarator = "const unsigned char %s[]"
else:
field_types = (f"{c_types[typ]} field_{i};" for i, (typ, _) in enumerate(data))
declarator = "const struct { %s } %%s" % " ".join(
field_types
) # pylint: disable=C0209 # consider-using-f-string
vals = []
for typ, val in data:
if typ != "reloc":
vals.append(str(val) + "UL")
else:
sym_name, addend = val["Symbol's Name + Addend"]
sym_name = re.sub(r"@.*", "", sym_name) # Can we pin version in C?
vals.append(f"(const char *)&{sym_name} + {addend}")
code_info[name] = (declarator, "{ %s }" % ", ".join(vals)) # pylint: disable= C0209 # consider-using-f-string
# Print declarations
for name, (decl, _) in sorted(code_info.items()):
type_name = name + "_type"
type_decl = decl % type_name
ss.append(
f"""\
typedef {type_decl};
extern __attribute__((weak)) {type_name} {name};
"""
)
# Print definitions
for name, (_, init) in sorted(code_info.items()):
type_name = name + "_type"
ss.append(
f"""\
const {type_name} {name} = {init};
"""
)
ss.append(
"""\
#ifdef __cplusplus
} // extern "C"
#endif
"""
)
return "".join(ss)
def main():
"""Driver function"""
parser = argparse.ArgumentParser(
description="Generate wrappers for shared library functions.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=f"""\
Examples:
$ python3 {me} /usr/lib/x86_64-linux-gnu/libaccountsservice.so.0
Generating libaccountsservice.so.0.tramp.S...
Generating libaccountsservice.so.0.init.cpp...
""",
)
parser.add_argument("library", metavar="LIB", help="Library to be wrapped.")
parser.add_argument("--verbose", "-v", help="Print diagnostic info", action="count", default=0)
parser.add_argument(
"--dlopen-callback", help="Call user-provided custom callback to load library instead of dlopen", default=""
)
parser.add_argument("--library-load-name", help="Use custom name for dlopened library (default is LIB)")
parser.add_argument(
"--vtables", help="Intercept virtual tables (EXPERIMENTAL)", dest="vtables", action="store_true", default=False
)
parser.add_argument(
"--no-vtables", help="Do not intercept virtual tables (default)", dest="vtables", action="store_false"
)
parser.add_argument(
"--target",
help="Target platform triple e.g. x86_64-unknown-linux-gnu or arm-none-eabi "
"(atm x86_64, i[0-9]86, arm/armhf/armeabi, aarch64/armv8 "
"and e2k are supported)",
default=os.uname()[-1],
)
parser.add_argument(
"--symbol-list", help="Path to file with symbols that should be present in wrapper " "(all by default)"
)
parser.add_argument(
"--symbol-filter", help="Regular expression filter on symbols to be wrapped", default=""
)
parser.add_argument("--symbol-prefix", metavar="PFX", help="Prefix wrapper symbols with PFX", default="")
parser.add_argument("-q", "--quiet", help="Do not print progress info", action="store_true")
parser.add_argument("--outdir", "-o", help="Path to create wrapper at", default="./")
args = parser.parse_args()
input_name = args.library
verbose = args.verbose
dlopen_callback = args.dlopen_callback
load_name = args.library_load_name or os.path.basename(input_name)
if args.target.startswith("arm"):
target = "arm" # Handle armhf-..., armel-...
elif re.match(r"^i[0-9]86", args.target):
target = "i386"
else:
target = args.target.split("-")[0]
quiet = args.quiet
outdir = args.outdir
if args.symbol_list is None:
funs = None
else:
with open(args.symbol_list, "r") as f:
funs = []
for line in re.split(r"\r?\n", f.read()):
line = re.sub(r"#.*", "", line)
line = line.strip()
if line:
funs.append(line)
# Collect target info
target_dir = os.path.join(root, "arch", target)
if not os.path.exists(target_dir):
error(f"unknown architecture '{target}'")
cfg = configparser.ConfigParser(inline_comment_prefixes=";")
cfg.read(target_dir + "/config.ini")
ptr_size = int(cfg["Arch"]["PointerSize"])
symbol_reloc_types = set(re.split(r"\s*,\s*", cfg["Arch"]["SymbolReloc"]))
def is_exported(s):
return (
s["Bind"] != "LOCAL"
and s["Type"] != "NOTYPE"
and s["Ndx"] != "UND"
and s["Name"] not in ["", "_init", "_fini"]
)
syms = list(filter(is_exported, collect_syms(input_name)))
if args.symbol_filter:
pattern = re.compile(args.symbol_filter)
syms = [ s for s in syms if pattern.match(s["Name"]) ]
def is_data_symbol(s):
return (
s["Type"] == "OBJECT"
# Allow vtables if --vtables is on
and not (" for " in s["Demangled Name"] and args.vtables)
)
exported_data = [s["Name"] for s in syms if is_data_symbol(s)]
if exported_data:
# TODO: we can generate wrappers for const data without relocations (or only code relocations)
warn(f"library '{input_name}' contains data symbols which won't be intercepted: " + ", ".join(exported_data))
# Collect functions
# TODO: warn if user-specified functions are missing
orig_funs = filter(lambda s: s["Type"] == "FUNC", syms)
all_funs = set()
warn_versioned = False
for s in orig_funs:
if s["Version"] is not None:
# TODO: support versions
if not warn_versioned:
warn(f"library {input_name} contains versioned symbols which are NYI")
warn_versioned = True
if verbose:
print(f"Skipping versioned symbol {s['Name']}")
continue
all_funs.add(s["Name"])
if funs is None:
funs = sorted(list(all_funs))
if not funs and not quiet:
warn(f"no public functions were found in {input_name}")
else:
missing_funs = [name for name in funs if name not in all_funs]
if missing_funs:
warn("some user-specified functions are not present in library: " + ", ".join(missing_funs))
funs = [name for name in funs if name in all_funs]
if verbose:
print("Exported functions:")
for i, fun in enumerate(funs):
print(f" {i}: {fun}")
# Collect vtables
if args.vtables:
cls_tables = {}
cls_syms = {}
for s in syms:
m = re.match(r"^(vtable|typeinfo|typeinfo name) for (.*)", s["Demangled Name"])
if m is not None and is_exported(s):
typ, cls = m.groups()
name = s["Name"]
cls_tables.setdefault(cls, {})[typ] = name
cls_syms[name] = s
if verbose:
print("Exported classes:")
for cls, _ in sorted(cls_tables.items()):
print(f" {cls}")
secs = collect_sections(input_name)
if verbose:
print("Sections:")
for sec in secs:
print(f" {sec['Name']}: [{sec['Address']:x}, {sec['Address'] + sec['Size']:x}), " f"at {sec['Off']:x}")
bites = read_unrelocated_data(input_name, cls_syms, secs)
rels = collect_relocs(input_name)
if verbose:
print("Relocs:")
for rel in rels:
sym_add = rel["Symbol's Name + Addend"]
print(f" {rel['Offset']}: {sym_add}")
cls_data = collect_relocated_data(cls_syms, bites, rels, ptr_size, symbol_reloc_types)
if verbose:
print("Class data:")
for name, data in sorted(cls_data.items()):
demangled_name = cls_syms[name]["Demangled Name"]
print(f" {name} ({demangled_name}):")
for typ, val in data:
print(" " + str(val if typ != "reloc" else val["Symbol's Name + Addend"]))
# Generate assembly code
suffix = os.path.basename(load_name)
lib_suffix = re.sub(r"[^a-zA-Z_0-9]+", "_", suffix)
tramp_file = f"{suffix}.tramp.S"
with open(os.path.join(outdir, tramp_file), "w") as f:
if not quiet:
print(f"Generating {tramp_file}...")
with open(target_dir + "/table.S.tpl", "r") as t:
table_text = string.Template(t.read()).substitute(
lib_suffix=lib_suffix, table_size=ptr_size * (len(funs) + 1)
)
f.write(table_text)
with open(target_dir + "/trampoline.S.tpl", "r") as t:
tramp_tpl = string.Template(t.read())
for i, name in enumerate(funs):
tramp_text = tramp_tpl.substitute(
lib_suffix=lib_suffix, sym=args.symbol_prefix + name, offset=i * ptr_size, number=i
)
f.write(tramp_text)
# Generate C code
init_file = f"{suffix}.init.cpp"
with open(os.path.join(outdir, init_file), "w") as f:
if not quiet:
print(f"Generating {init_file}...")
with open(os.path.join(root, "arch/common/init.cpp.tpl"), "r") as t:
if funs:
sym_names = ",\n ".join(f'"{name}"' for name in funs) + ","
else:
sym_names = ""
init_text = string.Template(t.read()).substitute(
lib_suffix=lib_suffix,
load_name=load_name,
dlopen_callback=dlopen_callback,
has_dlopen_callback=int(bool(dlopen_callback)),
sym_names=sym_names,
)
f.write(init_text)
if args.vtables:
vtable_text = generate_vtables(cls_tables, cls_syms, cls_data)
f.write(vtable_text)
if __name__ == "__main__":
main()