[WebAssembly] GC constructor functions in otherwise unused archive objects

This allows `__wasilibc_populate_libpreopen` to be GC'd in more cases
where it isn't needed, including when linked from Rust's libstd.

Differential Revision: https://reviews.llvm.org/D85062
This commit is contained in:
Dan Gohman 2020-09-30 20:00:04 -07:00
parent d938e6e3c2
commit 950ae43091
12 changed files with 170 additions and 22 deletions

View File

@ -0,0 +1,15 @@
.section .text.def,"",@
.globl def
def:
.functype def () -> ()
end_function
.section .text.test_ctor,"",@
.globl test_ctor
test_ctor:
.functype test_ctor () -> ()
end_function
.section .init_array,"",@
.p2align 2
.int32 test_ctor

View File

@ -0,0 +1,14 @@
.section .text.lib_func,"",@
.globl lib_func
lib_func:
.functype lib_func () -> ()
end_function
.section .text.unused_lib_func,"",@
.globl unused_lib_func
unused_lib_func:
.functype unused_lib_func () -> ()
call def
end_function
.functype def () -> ()

View File

@ -0,0 +1,21 @@
# Like Inputs/ctor-setup.s, except it calls `def` instead of `lib_func`,
# so it pulls in the .o file containing `ctor`.
.section .text._start,"",@
.globl _start
_start:
.functype _start () -> ()
end_function
.section .text.setup,"",@
.globl setup
setup:
.functype setup () -> ()
call def
end_function
.section .init_array,"",@
.p2align 2
.int32 setup
.functype def () -> ()

View File

@ -0,0 +1,19 @@
# Like Inputs/ctor-start.s, except it calls `lib_func` from a ctor
# instead of from `_start`.
.globl _start
_start:
.functype _start () -> ()
end_function
.globl setup
setup:
.functype setup () -> ()
call lib_func
end_function
.section .init_array,"",@
.p2align 2
.int32 setup
.functype lib_func () -> ()

View File

@ -0,0 +1,7 @@
.globl _start
_start:
.functype _start () -> ()
call lib_func
end_function
.functype lib_func () -> ()

View File

@ -0,0 +1,12 @@
; Like ctor-gc.test, but main object calls a function from its constructor,
; which shouldn't matter; `ctor` shouldn't be pulled in.
;
; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o
; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o
; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-setup.s -o %t.setup.o
; RUN: rm -f %t.lib.a
; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
; RUN: wasm-ld %t.setup.o %t.lib.a -o %t.wasm
; RUN: obj2yaml %t.wasm | FileCheck %s
; CHECK-NOT: Name: test_ctor

View File

@ -0,0 +1,12 @@
; Verify that constructors from a .o file which it initially depends on but
; doesn't ultimately contribute to the final link are not included.
;
; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o
; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o
; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-start.s -o %t.start.o
; RUN: rm -f %t.lib.a
; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
; RUN: wasm-ld %t.start.o %t.lib.a -o %t.wasm
; RUN: obj2yaml %t.wasm | FileCheck %s
; CHECK-NOT: __wasm_call_ctors

View File

@ -0,0 +1,12 @@
; Like ctor-gc-setup.test, but it calls a different function, so it does pull
; in the object containing `ctor`, so `ctor` is linked in.
;
; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o
; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o
; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-setup-call-def.s -o %t.setup-call-def.o
; RUN: rm -f %t.lib.a
; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
; RUN: wasm-ld %t.setup-call-def.o %t.lib.a -o %t.wasm
; RUN: obj2yaml %t.wasm | FileCheck %s
; CHECK: Name: test_ctor

View File

@ -60,8 +60,14 @@ public:
MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
// An InputFile is considered live if any of the symbols defined by it
// are live.
void markLive() { live = true; }
bool isLive() const { return live; }
protected:
InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {}
InputFile(Kind k, MemoryBufferRef m)
: mb(m), fileKind(k), live(!config->gcSections) {}
MemoryBufferRef mb;
// List of all symbols referenced or defined by this file.
@ -69,6 +75,7 @@ protected:
private:
const Kind fileKind;
bool live;
};
// .a file (ar archive)
@ -92,6 +99,10 @@ public:
explicit ObjFile(MemoryBufferRef m, StringRef archiveName)
: InputFile(ObjectKind, m) {
this->archiveName = std::string(archiveName);
// If this isn't part of an archive, it's eagerly linked, so mark it live.
if (archiveName.empty())
markLive();
}
static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
@ -156,6 +167,10 @@ public:
explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName)
: InputFile(BitcodeKind, m) {
this->archiveName = std::string(archiveName);
// If this isn't part of an archive, it's eagerly linked, so mark it live.
if (archiveName.empty())
markLive();
}
static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }

View File

@ -42,6 +42,7 @@ public:
private:
void enqueue(Symbol *sym);
void enqueueInitFunctions(const ObjFile *sym);
void markSymbol(Symbol *sym);
void mark();
bool isCallCtorsLive();
@ -56,11 +57,35 @@ void MarkLive::enqueue(Symbol *sym) {
if (!sym || sym->isLive())
return;
LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");
InputFile *file = sym->getFile();
bool needInitFunctions = file && !file->isLive() && sym->isDefined();
sym->markLive();
// Mark ctor functions in the object that defines this symbol live.
// The ctor functions are all referenced by the synthetic callCtors
// function. However, this function does not contain relocations so we
// have to manually mark the ctors as live.
if (needInitFunctions)
enqueueInitFunctions(cast<ObjFile>(file));
if (InputChunk *chunk = sym->getChunk())
queue.push_back(chunk);
}
// The ctor functions are all referenced by the synthetic callCtors
// function. However, this function does not contain relocations so we
// have to manually mark the ctors as live.
void MarkLive::enqueueInitFunctions(const ObjFile *obj) {
const WasmLinkingData &l = obj->getWasmObj()->linkingData();
for (const WasmInitFunc &f : l.InitFunctions) {
auto *initSym = obj->getFunctionSymbol(f.Symbol);
if (!initSym->isDiscarded())
enqueue(initSym);
}
}
void MarkLive::run() {
// Add GC root symbols.
if (!config->entry.empty())
@ -75,31 +100,24 @@ void MarkLive::run() {
if (Symbol *callDtors = WasmSym::callDtors)
enqueue(callDtors);
// The ctor functions are all referenced by the synthetic callCtors
// function. However, this function does not contain relocations so we
// have to manually mark the ctors as live.
for (const ObjFile *obj : symtab->objectFiles) {
const WasmLinkingData &l = obj->getWasmObj()->linkingData();
for (const WasmInitFunc &f : l.InitFunctions) {
auto *initSym = obj->getFunctionSymbol(f.Symbol);
if (!initSym->isDiscarded())
enqueue(initSym);
}
}
// In Emscripten-style PIC, `__wasm_call_ctors` calls `__wasm_apply_relocs`.
if (config->isPic)
enqueue(WasmSym::applyRelocs);
// If we have any non-discarded init functions, mark `__wasm_call_ctors` as
// live so that we assign it an index and call it.
if (isCallCtorsLive())
enqueue(WasmSym::callCtors);
if (config->sharedMemory && !config->shared)
enqueue(WasmSym::initMemory);
// Enqueue constructors in objects explicitly live from the command-line.
for (const ObjFile *obj : symtab->objectFiles)
if (obj->isLive())
enqueueInitFunctions(obj);
mark();
// If we have any non-discarded init functions, mark `__wasm_call_ctors` as
// live so that we assign it an index and call it.
if (isCallCtorsLive())
WasmSym::callCtors->markLive();
}
void MarkLive::mark() {
@ -181,9 +199,11 @@ bool MarkLive::isCallCtorsLive() {
// it can call them.
for (const ObjFile *file : symtab->objectFiles) {
const WasmLinkingData &l = file->getWasmObj()->linkingData();
for (const WasmInitFunc &f : l.InitFunctions)
if (!file->getFunctionSymbol(f.Symbol)->isDiscarded())
for (const WasmInitFunc &f : l.InitFunctions) {
auto *sym = file->getFunctionSymbol(f.Symbol);
if (!sym->isDiscarded() && sym->isLive())
return true;
}
}
return false;

View File

@ -132,6 +132,8 @@ bool Symbol::isLive() const {
void Symbol::markLive() {
assert(!isDiscarded());
if (file != NULL)
file->markLive();
if (auto *g = dyn_cast<DefinedGlobal>(this))
g->global->live = true;
if (auto *e = dyn_cast<DefinedEvent>(this))

View File

@ -1112,9 +1112,8 @@ void Writer::calculateInitFunctions() {
for (const WasmInitFunc &f : l.InitFunctions) {
FunctionSymbol *sym = file->getFunctionSymbol(f.Symbol);
// comdat exclusions can cause init functions be discarded.
if (sym->isDiscarded())
if (sym->isDiscarded() || !sym->isLive())
continue;
assert(sym->isLive());
if (sym->signature->Params.size() != 0)
error("constructor functions cannot take arguments: " + toString(*sym));
LLVM_DEBUG(dbgs() << "initFunctions: " << toString(*sym) << "\n");