[lld][WebAssembly] Initialize bss segments using memory.fill

Previously we were relying on the dynamic loader to take care of this
but it simple and correct for us to do it here instead.

Now we initialize bss segments as part of `__wasm_init_memory` at the
same time we initialize passive segments.

In addition we extent the us of `__wasm_init_memory` outside of shared
memory situations.  Specifically it is now used to initialize bss
segments when the memory is imported.

Differential Revision: https://reviews.llvm.org/D112667
This commit is contained in:
Sam Clegg 2021-10-26 18:08:07 -07:00
parent b65f24a74c
commit 1eb79e732c
9 changed files with 249 additions and 119 deletions

View File

@ -187,6 +187,13 @@
; DIS-NEXT: i32.const 0
; DIS-NEXT: i32.const 20
; DIS-NEXT: memory.init 1, 0
; NOPIC-DIS-NEXT: [[PTR]].const 1060
; PIC-DIS-NEXT: [[PTR]].const 36
; PIC-DIS-NEXT: global.get 1
; PIC-DIS-NEXT: [[PTR]].add
; DIS-NEXT: i32.const 0
; DIS-NEXT: i32.const 10000
; DIS-NEXT: memory.fill 0
; NOPIC-DIS-NEXT: [[PTR]].const 11060
; PIC-DIS-NEXT: local.get 0

View File

@ -0,0 +1,72 @@
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
# RUN: wasm-ld --experimental-pic -shared --shared-memory -o %t.so %t.o
# RUN: llvm-objdump -d --no-show-raw-insn --no-leading-addr %t.so | FileCheck %s
# RUN: obj2yaml %t.so | FileCheck %s --check-prefix=YAML
.section .bss.foo,"",@
.globl foo
.p2align 2
foo:
.int32 0
.size foo, 4
.section .data.bar,"",@
.globl bar
.p2align 2
bar:
.int32 42
.size bar, 4
.section .custom_section.target_features,"",@
.int8 2
.int8 43
.int8 7
.ascii "atomics"
.int8 43
.int8 11
.ascii "bulk-memory"
# Verify that there is only a single data segment and no bss
# in the binary:
# YAML: - Type: DATA{{$}}
# YAML-NEXT: Segments:
# YAML-NEXT: - SectionOffset: 3
# YAML-NEXT: InitFlags: 1
# YAML-NEXT: Content: 2A000000
# YAML-NEXT: - Type: CUSTOM
# CHECK: <__wasm_init_memory>:
# CHECK-NEXT: .local i32
# CHECK-NEXT: global.get 0
# CHECK-NEXT: i32.const 8
# CHECK-NEXT: i32.add
# CHECK-NEXT: local.set 0
# CHECK-NEXT: block
# CHECK-NEXT: block
# CHECK-NEXT: block
# CHECK-NEXT: local.get 0
# CHECK-NEXT: i32.const 0
# CHECK-NEXT: i32.const 1
# CHECK-NEXT: i32.atomic.rmw.cmpxchg 0
# CHECK-NEXT: br_table {0, 1, 2} # 1: down to label1
# CHECK-NEXT: # 2: down to label0
# CHECK-NEXT: end
# Regular data gets initialized with memory.init
# CHECK-NEXT: i32.const 0
# CHECK-NEXT: global.get 0
# CHECK-NEXT: i32.add
# CHECK-NEXT: i32.const 0
# CHECK-NEXT: i32.const 4
# CHECK-NEXT: memory.init 0, 0
# BSS gets initialized with memory.fill
# CHECK-NEXT: i32.const 4
# CHECK-NEXT: global.get 0
# CHECK-NEXT: i32.add
# CHECK-NEXT: i32.const 0
# CHECK-NEXT: i32.const 4
# CHECK-NEXT: memory.fill 0

View File

@ -87,7 +87,7 @@ tls3:
# CHECK-NEXT: Mutable: true
# CHECK-NEXT: InitExpr:
# CHECK-NEXT: Opcode: I32_CONST
# CHECK-NEXT: Value: 66576
# CHECK-NEXT: Value: 66592
# __tls_base
# CHECK-NEXT: - Index: 1

View File

@ -91,6 +91,10 @@ struct Configuration {
// for shared libraries (since they always added to a dynamic offset at
// runtime).
uint32_t tableBase = 0;
// Will be set to true if bss data segments should be emitted. In most cases
// this is not necessary.
bool emitBssSegments = false;
};
// The only instance of Configuration struct.

View File

@ -133,10 +133,9 @@ void CodeSection::writeRelocations(raw_ostream &os) const {
void DataSection::finalizeContents() {
raw_string_ostream os(dataSectionHeader);
unsigned segmentCount =
std::count_if(segments.begin(), segments.end(),
[](OutputSegment *segment) { return !segment->isBss; });
unsigned segmentCount = std::count_if(
segments.begin(), segments.end(),
[](OutputSegment *segment) { return segment->requiredInBinary(); });
#ifndef NDEBUG
unsigned activeCount = std::count_if(
segments.begin(), segments.end(), [](OutputSegment *segment) {
@ -152,7 +151,7 @@ void DataSection::finalizeContents() {
bodySize = dataSectionHeader.size();
for (OutputSegment *segment : segments) {
if (segment->isBss)
if (!segment->requiredInBinary())
continue;
raw_string_ostream os(segment->header);
writeUleb128(os, segment->initFlags, "init flags");
@ -199,7 +198,7 @@ void DataSection::writeTo(uint8_t *buf) {
memcpy(buf, dataSectionHeader.data(), dataSectionHeader.size());
for (const OutputSegment *segment : segments) {
if (segment->isBss)
if (!segment->requiredInBinary())
continue;
// Write data segment header
uint8_t *segStart = buf + segment->sectionOffset;
@ -227,7 +226,7 @@ void DataSection::writeRelocations(raw_ostream &os) const {
bool DataSection::isNeeded() const {
for (const OutputSegment *seg : segments)
if (!seg->isBss)
if (seg->requiredInBinary())
return true;
return false;
}

View File

@ -24,6 +24,11 @@ public:
void addInputSegment(InputChunk *inSeg);
void finalizeInputSegments();
// In most circumstances BSS segments don't need to be written
// to the output binary. However if the memory is imported, and
// we can't use memory.fill during startup (due to lack of bulk
// memory feature) then we include BSS segments verbatim.
bool requiredInBinary() const { return !isBss || config->emitBssSegments; }
bool isTLS() const { return name == ".tdata"; }

View File

@ -558,9 +558,10 @@ void ElemSection::writeBody() {
DataCountSection::DataCountSection(ArrayRef<OutputSegment *> segments)
: SyntheticSection(llvm::wasm::WASM_SEC_DATACOUNT),
numSegments(std::count_if(
segments.begin(), segments.end(),
[](OutputSegment *const segment) { return !segment->isBss; })) {}
numSegments(std::count_if(segments.begin(), segments.end(),
[](OutputSegment *const segment) {
return segment->requiredInBinary();
})) {}
void DataCountSection::writeBody() {
writeUleb128(bodyOutputStream, numSegments, "data count");
@ -716,7 +717,7 @@ unsigned NameSection::numNamedDataSegments() const {
unsigned numNames = 0;
for (const OutputSegment *s : segments)
if (!s->name.empty() && !s->isBss)
if (!s->name.empty() && s->requiredInBinary())
++numNames;
return numNames;
@ -789,7 +790,7 @@ void NameSection::writeBody() {
writeUleb128(sub.os, count, "name count");
for (OutputSegment *s : segments) {
if (!s->name.empty() && !s->isBss) {
if (!s->name.empty() && s->requiredInBinary()) {
writeUleb128(sub.os, s->index, "global index");
writeStr(sub.os, s->name, "segment name");
}

View File

@ -73,6 +73,10 @@ private:
void populateSymtab();
void populateProducers();
void populateTargetFeatures();
// populateTargetFeatures happens early on so some checks are delayed
// until imports and exports are finalized. There are run unstead
// in checkImportExportTargetFeatures
void checkImportExportTargetFeatures();
void calculateInitFunctions();
void calculateImports();
void calculateExports();
@ -485,25 +489,6 @@ void Writer::populateTargetFeatures() {
if (!config->checkFeatures)
return;
if (!config->relocatable && allowed.count("mutable-globals") == 0) {
for (const Symbol *sym : out.importSec->importedSymbols) {
if (auto *global = dyn_cast<GlobalSymbol>(sym)) {
if (global->getGlobalType()->Mutable) {
error(Twine("mutable global imported but 'mutable-globals' feature "
"not present in inputs: `") +
toString(*sym) + "`. Use --no-check-features to suppress.");
}
}
}
for (const Symbol *sym : out.exportSec->exportedSymbols) {
if (isa<GlobalSymbol>(sym)) {
error(Twine("mutable global exported but 'mutable-globals' feature "
"not present in inputs: `") +
toString(*sym) + "`. Use --no-check-features to suppress.");
}
}
}
if (config->sharedMemory) {
if (disallowed.count("shared-mem"))
error("--shared-memory is disallowed by " + disallowed["shared-mem"] +
@ -552,6 +537,37 @@ void Writer::populateTargetFeatures() {
". Use --no-check-features to suppress.");
}
}
// Normally we don't include bss segments in the binary. In particular if
// memory is not being imported then we can assume its zero initialized.
// In the case the memory is imported, we and we can use the memory.fill
// instrction than we can also avoid inluding the segments.
if (config->importMemory && !allowed.count("bulk-memory"))
config->emitBssSegments = true;
}
void Writer::checkImportExportTargetFeatures() {
if (config->relocatable || !config->checkFeatures)
return;
if (out.targetFeaturesSec->features.count("mutable-globals") == 0) {
for (const Symbol *sym : out.importSec->importedSymbols) {
if (auto *global = dyn_cast<GlobalSymbol>(sym)) {
if (global->getGlobalType()->Mutable) {
error(Twine("mutable global imported but 'mutable-globals' feature "
"not present in inputs: `") +
toString(*sym) + "`. Use --no-check-features to suppress.");
}
}
}
for (const Symbol *sym : out.exportSec->exportedSymbols) {
if (isa<GlobalSymbol>(sym)) {
error(Twine("mutable global exported but 'mutable-globals' feature "
"not present in inputs: `") +
toString(*sym) + "`. Use --no-check-features to suppress.");
}
}
}
}
static bool shouldImport(Symbol *sym) {
@ -851,11 +867,7 @@ OutputSegment *Writer::createOutputSegment(StringRef name) {
OutputSegment *s = make<OutputSegment>(name);
if (config->sharedMemory)
s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE;
// Exported memories are guaranteed to be zero-initialized, so no need
// to emit data segments for bss sections.
// TODO: consider initializing bss sections with memory.fill
// instructions when memory is imported and bulk-memory is available.
if (!config->importMemory && !config->relocatable && name.startswith(".bss"))
if (!config->relocatable && name.startswith(".bss"))
s->isBss = true;
segments.push_back(s);
return s;
@ -951,8 +963,14 @@ static void createFunction(DefinedFunction *func, StringRef bodyContent) {
}
bool Writer::needsPassiveInitialization(const OutputSegment *segment) {
return segment->initFlags & WASM_DATA_SEGMENT_IS_PASSIVE &&
!segment->isTLS() && !segment->isBss;
// TLS segments are initialized separately
if (segment->isTLS())
return false;
// If bulk memory features is supported then we can perform bss initialization
// (via memory.fill) during `__wasm_init_memory`.
if (config->importMemory && !segment->requiredInBinary())
return true;
return segment->initFlags & WASM_DATA_SEGMENT_IS_PASSIVE;
}
bool Writer::hasPassiveInitializedSegments() {
@ -970,7 +988,9 @@ void Writer::createSyntheticInitFunctions() {
// Passive segments are used to avoid memory being reinitialized on each
// thread's instantiation. These passive segments are initialized and
// dropped in __wasm_init_memory, which is registered as the start function
if (config->sharedMemory && hasPassiveInitializedSegments()) {
// We also initialize bss segments (using memory.fill) as part of this
// function.
if (hasPassiveInitializedSegments()) {
WasmSym::initMemory = symtab->addSyntheticFunction(
"__wasm_init_memory", WASM_SYMBOL_VISIBILITY_HIDDEN,
make<SyntheticFunction>(nullSignature, "__wasm_init_memory"));
@ -1012,9 +1032,12 @@ void Writer::createSyntheticInitFunctions() {
void Writer::createInitMemoryFunction() {
LLVM_DEBUG(dbgs() << "createInitMemoryFunction\n");
assert(WasmSym::initMemory);
assert(WasmSym::initMemoryFlag);
assert(hasPassiveInitializedSegments());
uint64_t flagAddress = WasmSym::initMemoryFlag->getVA();
uint64_t flagAddress;
if (config->sharedMemory) {
assert(WasmSym::initMemoryFlag);
flagAddress = WasmSym::initMemoryFlag->getVA();
}
bool is64 = config->is64.getValueOr(false);
std::string bodyContent;
{
@ -1070,21 +1093,6 @@ void Writer::createInitMemoryFunction() {
// (i32.const $__init_memory_flag)
// (i32.const 1)
// With PIC code we cache the flag address in local 0
if (config->isPic) {
writeUleb128(os, 1, "num local decls");
writeUleb128(os, 1, "local count");
writeU8(os, is64 ? WASM_TYPE_I64 : WASM_TYPE_I32, "address type");
writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base");
writePtrConst(os, flagAddress, is64, "flag address");
writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "add");
writeU8(os, WASM_OPCODE_LOCAL_SET, "local.set");
writeUleb128(os, 0, "local 0");
} else {
writeUleb128(os, 0, "num locals");
}
auto writeGetFlagAddress = [&]() {
if (config->isPic) {
writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get");
@ -1094,34 +1102,57 @@ void Writer::createInitMemoryFunction() {
}
};
// Set up destination blocks
writeU8(os, WASM_OPCODE_BLOCK, "block $drop");
writeU8(os, WASM_TYPE_NORESULT, "block type");
writeU8(os, WASM_OPCODE_BLOCK, "block $wait");
writeU8(os, WASM_TYPE_NORESULT, "block type");
writeU8(os, WASM_OPCODE_BLOCK, "block $init");
writeU8(os, WASM_TYPE_NORESULT, "block type");
if (config->sharedMemory) {
// With PIC code we cache the flag address in local 0
if (config->isPic) {
writeUleb128(os, 1, "num local decls");
writeUleb128(os, 1, "local count");
writeU8(os, is64 ? WASM_TYPE_I64 : WASM_TYPE_I32, "address type");
writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
writeUleb128(os, WasmSym::memoryBase->getGlobalIndex(), "memory_base");
writePtrConst(os, flagAddress, is64, "flag address");
writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD, "add");
writeU8(os, WASM_OPCODE_LOCAL_SET, "local.set");
writeUleb128(os, 0, "local 0");
} else {
writeUleb128(os, 0, "num locals");
}
// Atomically check whether we win the race.
writeGetFlagAddress();
writeI32Const(os, 0, "expected flag value");
writeI32Const(os, 1, "new flag value");
writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix");
writeUleb128(os, WASM_OPCODE_I32_RMW_CMPXCHG, "i32.atomic.rmw.cmpxchg");
writeMemArg(os, 2, 0);
// Set up destination blocks
writeU8(os, WASM_OPCODE_BLOCK, "block $drop");
writeU8(os, WASM_TYPE_NORESULT, "block type");
writeU8(os, WASM_OPCODE_BLOCK, "block $wait");
writeU8(os, WASM_TYPE_NORESULT, "block type");
writeU8(os, WASM_OPCODE_BLOCK, "block $init");
writeU8(os, WASM_TYPE_NORESULT, "block type");
// Based on the value, decide what to do next.
writeU8(os, WASM_OPCODE_BR_TABLE, "br_table");
writeUleb128(os, 2, "label vector length");
writeUleb128(os, 0, "label $init");
writeUleb128(os, 1, "label $wait");
writeUleb128(os, 2, "default label $drop");
// Atomically check whether we win the race.
writeGetFlagAddress();
writeI32Const(os, 0, "expected flag value");
writeI32Const(os, 1, "new flag value");
writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix");
writeUleb128(os, WASM_OPCODE_I32_RMW_CMPXCHG, "i32.atomic.rmw.cmpxchg");
writeMemArg(os, 2, 0);
// Based on the value, decide what to do next.
writeU8(os, WASM_OPCODE_BR_TABLE, "br_table");
writeUleb128(os, 2, "label vector length");
writeUleb128(os, 0, "label $init");
writeUleb128(os, 1, "label $wait");
writeUleb128(os, 2, "default label $drop");
// Initialize passive data segments
writeU8(os, WASM_OPCODE_END, "end $init");
} else {
writeUleb128(os, 0, "num local decls");
}
// Initialize passive data segments
writeU8(os, WASM_OPCODE_END, "end $init");
for (const OutputSegment *s : segments) {
if (needsPassiveInitialization(s)) {
// destination address
// For passive BSS segments we can simple issue a memory.fill(0).
// For non-BSS segments we do a memory.init. Both these
// instructions take as thier first argument the destination
// address.
writePtrConst(os, s->startVA, is64, "destination address");
if (config->isPic) {
writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
@ -1130,52 +1161,60 @@ void Writer::createInitMemoryFunction() {
writeU8(os, is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD,
"i32.add");
}
// source segment offset
writeI32Const(os, 0, "segment offset");
// memory region size
writeI32Const(os, s->size, "memory region size");
// memory.init instruction
writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix");
writeUleb128(os, WASM_OPCODE_MEMORY_INIT, "memory.init");
writeUleb128(os, s->index, "segment index immediate");
writeU8(os, 0, "memory index immediate");
if (s->isBss) {
writeI32Const(os, 0, "fill value");
writeI32Const(os, s->size, "memory region size");
writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix");
writeUleb128(os, WASM_OPCODE_MEMORY_FILL, "memory.fill");
writeU8(os, 0, "memory index immediate");
} else {
writeI32Const(os, 0, "source segment offset");
writeI32Const(os, s->size, "memory region size");
writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix");
writeUleb128(os, WASM_OPCODE_MEMORY_INIT, "memory.init");
writeUleb128(os, s->index, "segment index immediate");
writeU8(os, 0, "memory index immediate");
}
}
}
// Set flag to 2 to mark end of initialization
writeGetFlagAddress();
writeI32Const(os, 2, "flag value");
writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix");
writeUleb128(os, WASM_OPCODE_I32_ATOMIC_STORE, "i32.atomic.store");
writeMemArg(os, 2, 0);
if (config->sharedMemory) {
// Set flag to 2 to mark end of initialization
writeGetFlagAddress();
writeI32Const(os, 2, "flag value");
writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix");
writeUleb128(os, WASM_OPCODE_I32_ATOMIC_STORE, "i32.atomic.store");
writeMemArg(os, 2, 0);
// Notify any waiters that memory initialization is complete
writeGetFlagAddress();
writeI32Const(os, -1, "number of waiters");
writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix");
writeUleb128(os, WASM_OPCODE_ATOMIC_NOTIFY, "atomic.notify");
writeMemArg(os, 2, 0);
writeU8(os, WASM_OPCODE_DROP, "drop");
// Notify any waiters that memory initialization is complete
writeGetFlagAddress();
writeI32Const(os, -1, "number of waiters");
writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix");
writeUleb128(os, WASM_OPCODE_ATOMIC_NOTIFY, "atomic.notify");
writeMemArg(os, 2, 0);
writeU8(os, WASM_OPCODE_DROP, "drop");
// Branch to drop the segments
writeU8(os, WASM_OPCODE_BR, "br");
writeUleb128(os, 1, "label $drop");
// Branch to drop the segments
writeU8(os, WASM_OPCODE_BR, "br");
writeUleb128(os, 1, "label $drop");
// Wait for the winning thread to initialize memory
writeU8(os, WASM_OPCODE_END, "end $wait");
writeGetFlagAddress();
writeI32Const(os, 1, "expected flag value");
writeI64Const(os, -1, "timeout");
// Wait for the winning thread to initialize memory
writeU8(os, WASM_OPCODE_END, "end $wait");
writeGetFlagAddress();
writeI32Const(os, 1, "expected flag value");
writeI64Const(os, -1, "timeout");
writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix");
writeUleb128(os, WASM_OPCODE_I32_ATOMIC_WAIT, "i32.atomic.wait");
writeMemArg(os, 2, 0);
writeU8(os, WASM_OPCODE_DROP, "drop");
writeU8(os, WASM_OPCODE_ATOMICS_PREFIX, "atomics prefix");
writeUleb128(os, WASM_OPCODE_I32_ATOMIC_WAIT, "i32.atomic.wait");
writeMemArg(os, 2, 0);
writeU8(os, WASM_OPCODE_DROP, "drop");
// Unconditionally drop passive data segments
writeU8(os, WASM_OPCODE_END, "end $drop");
}
// Unconditionally drop passive data segments
writeU8(os, WASM_OPCODE_END, "end $drop");
for (const OutputSegment *s : segments) {
if (needsPassiveInitialization(s)) {
if (needsPassiveInitialization(s) && !s->isBss) {
// data.drop instruction
writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix");
writeUleb128(os, WASM_OPCODE_DATA_DROP, "data.drop");
@ -1502,6 +1541,8 @@ void Writer::run() {
scanRelocations();
log("-- finalizeIndirectFunctionTable");
finalizeIndirectFunctionTable();
log("-- populateTargetFeatures");
populateTargetFeatures();
log("-- createSyntheticInitFunctions");
createSyntheticInitFunctions();
log("-- assignIndexes");
@ -1550,8 +1591,8 @@ void Writer::run() {
calculateCustomSections();
log("-- populateSymtab");
populateSymtab();
log("-- populateTargetFeatures");
populateTargetFeatures();
log("-- checkImportExportTargetFeatures");
checkImportExportTargetFeatures();
log("-- addSections");
addSections();

View File

@ -298,6 +298,7 @@ enum : unsigned {
WASM_OPCODE_DROP = 0x1a,
WASM_OPCODE_MISC_PREFIX = 0xfc,
WASM_OPCODE_MEMORY_INIT = 0x08,
WASM_OPCODE_MEMORY_FILL = 0x0b,
WASM_OPCODE_DATA_DROP = 0x09,
WASM_OPCODE_ATOMICS_PREFIX = 0xfe,
WASM_OPCODE_ATOMIC_NOTIFY = 0x00,