[lld][WebAssembly] Initial support merging string data

This change adds support for a new WASM_SEG_FLAG_STRINGS flag in
the object format which works in a similar fashion to SHF_STRINGS
in the ELF world.

Unlike the ELF linker this support is currently limited:
- No support for SHF_MERGE (non-string merging)
- Always do full tail merging ("lo" can be merged with "hello")
- Only support single byte strings (p2align 0)

Like the ELF linker merging is only performed at `-O1` and above.

This fixes part of https://bugs.llvm.org/show_bug.cgi?id=48828,
although crucially it doesn't not currently support debug sections
because they are not represented by data segments (they are custom
sections)

Differential Revision: https://reviews.llvm.org/D97657
This commit is contained in:
Sam Clegg 2021-02-26 16:09:32 -08:00
parent 85af8a8c1b
commit 5000a1b4b9
25 changed files with 560 additions and 76 deletions

View File

@ -0,0 +1,65 @@
// RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o
// RUN: wasm-ld -O2 %t.o -o %t.wasm --no-gc-sections --no-entry
// RUN: obj2yaml %t.wasm | FileCheck %s --check-prefixes=COMMON,MERGE
// RUN: wasm-ld -O0 %t.o -o %t2.wasm --no-gc-sections --no-entry
// RUN: obj2yaml %t2.wasm | FileCheck --check-prefixes=COMMON,NOMERGE %s
.section .rodata1,"S",@
.asciz "abc"
foo:
.ascii "a"
.size foo, 1
bar:
.asciz "bc"
.asciz "bc"
.size bar, 4
.globl foo
.globl bar
.export_name foo, foo
.export_name bar, bar
// COMMON: - Type: GLOBAL
// COMMON-NEXT: Globals:
// COMMON-NEXT: - Index: 0
// COMMON-NEXT: Type: I32
// COMMON-NEXT: Mutable: true
// COMMON-NEXT: InitExpr:
// COMMON-NEXT: Opcode: I32_CONST
// COMMON-NEXT: Value: 66576
// COMMON-NEXT: - Index: 1
// COMMON-NEXT: Type: I32
// COMMON-NEXT: Mutable: false
// COMMON-NEXT: InitExpr:
// COMMON-NEXT: Opcode: I32_CONST
// MERGE-NEXT: Value: 1024
// NOMERGE-NEXT: Value: 1028
// COMMON-NEXT: - Index: 2
// COMMON-NEXT: Type: I32
// COMMON-NEXT: Mutable: false
// COMMON-NEXT: InitExpr:
// COMMON-NEXT: Opcode: I32_CONST
// MERGE-NEXT: Value: 1025
// NOMERGE-NEXT: Value: 1029
// COMMON-NEXT: - Type: EXPORT
// COMMON-NEXT: Exports:
// COMMON-NEXT: - Name: memory
// COMMON-NEXT: Kind: MEMORY
// COMMON-NEXT: Index: 0
// COMMON-NEXT: - Name: foo
// COMMON-NEXT: Kind: GLOBAL
// COMMON-NEXT: Index: 1
// COMMON-NEXT: - Name: bar
// COMMON-NEXT: Kind: GLOBAL
// COMMON-NEXT: Index: 2
//
// COMMON: - Type: DATA
// COMMON-NEXT: Segments:
// COMMON-NEXT: - SectionOffset: 7
// COMMON-NEXT: InitFlags: 0
// COMMON-NEXT: Offset:
// COMMON-NEXT: Opcode: I32_CONST
// COMMON-NEXT: Value: 1024
// MERGE-NEXT: Content: '61626300'
// NOMERGE-NEXT: Content: '6162630061626300626300'

View File

@ -10,6 +10,7 @@ add_lld_library(lldWasm
MapFile.cpp
MarkLive.cpp
OutputSections.cpp
OutputSegment.cpp
Relocations.cpp
SymbolTable.cpp
Symbols.cpp

View File

@ -385,7 +385,7 @@ static void readConfigs(opt::InputArgList &args) {
LLVM_ENABLE_NEW_PASS_MANAGER);
config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager);
config->mapFile = args.getLastArgValue(OPT_Map);
config->optimize = args::getInteger(args, OPT_O, 0);
config->optimize = args::getInteger(args, OPT_O, 1);
config->outputFile = args.getLastArgValue(OPT_o);
config->relocatable = args.hasArg(OPT_relocatable);
config->gcSections =
@ -795,6 +795,18 @@ static void wrapSymbols(ArrayRef<WrappedSymbol> wrapped) {
symtab->wrap(w.sym, w.real, w.wrap);
}
static void splitSections() {
// splitIntoPieces needs to be called on each MergeInputSection
// before calling finalizeContents().
LLVM_DEBUG(llvm::dbgs() << "splitSections\n");
parallelForEach(symtab->objectFiles, [](ObjFile *file) {
for (InputSegment *seg : file->segments) {
if (auto *s = dyn_cast<MergeInputSegment>(seg))
s->splitIntoPieces();
}
});
}
void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
WasmOptTable parser;
opt::InputArgList args = parser.parse(argsArr.slice(1));
@ -981,6 +993,10 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
if (errorCount())
return;
// Split WASM_SEG_FLAG_STRINGS sections into pieces in preparation for garbage
// collection.
splitSections();
// Do size optimizations: garbage collection
markLive();

View File

@ -13,6 +13,7 @@
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/LLVM.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/xxhash.h"
#define DEBUG_TYPE "lld"
@ -126,6 +127,10 @@ void InputChunk::writeTo(uint8_t *buf) const {
memcpy(buf + outSecOff, data().data(), data().size());
// Apply relocations
relocate(buf + outSecOff);
}
void InputChunk::relocate(uint8_t *buf) const {
if (relocations.empty())
return;
@ -135,11 +140,11 @@ void InputChunk::writeTo(uint8_t *buf) const {
LLVM_DEBUG(dbgs() << "applying relocations: " << toString(this)
<< " count=" << relocations.size() << "\n");
int32_t off = outSecOff - getInputSectionOffset();
int32_t inputSectionOffset = getInputSectionOffset();
auto tombstone = getTombstone();
for (const WasmRelocation &rel : relocations) {
uint8_t *loc = buf + rel.Offset + off;
uint8_t *loc = buf + rel.Offset - inputSectionOffset;
auto value = file->calcNewValue(rel, tombstone, this);
LLVM_DEBUG(dbgs() << "apply reloc: type=" << relocTypeToString(rel.Type));
if (rel.Type != R_WASM_TYPE_INDEX_LEB)
@ -357,8 +362,20 @@ void InputFunction::writeTo(uint8_t *buf) const {
LLVM_DEBUG(dbgs() << " total: " << (buf + chunkSize - orig) << "\n");
}
uint64_t InputSegment::getOffset(uint64_t offset) const {
if (const MergeInputSegment *ms = dyn_cast<MergeInputSegment>(this)) {
LLVM_DEBUG(dbgs() << "getOffset(merged): " << getName() << "\n");
LLVM_DEBUG(dbgs() << "offset: " << offset << "\n");
LLVM_DEBUG(dbgs() << "parentOffset: " << ms->getParentOffset(offset)
<< "\n");
assert(ms->parent);
return ms->parent->getOffset(ms->getParentOffset(offset));
}
return outputSegmentOffset + offset;
}
uint64_t InputSegment::getVA(uint64_t offset) const {
return outputSeg->startVA + outputSegmentOffset + offset;
return (outputSeg ? outputSeg->startVA : 0) + getOffset(offset);
}
// Generate code to apply relocations to the data section at runtime.
@ -431,6 +448,93 @@ void InputSegment::generateRelocationCode(raw_ostream &os) const {
}
}
// Split WASM_SEG_FLAG_STRINGS section. Such a section is a sequence of
// null-terminated strings.
void MergeInputSegment::splitStrings(ArrayRef<uint8_t> data) {
LLVM_DEBUG(llvm::dbgs() << "splitStrings\n");
size_t off = 0;
StringRef s = toStringRef(data);
while (!s.empty()) {
size_t end = s.find(0);
if (end == StringRef::npos)
fatal(toString(this) + ": string is not null terminated");
size_t size = end + 1;
pieces.emplace_back(off, xxHash64(s.substr(0, size)), true);
s = s.substr(size);
off += size;
}
}
// This function is called after we obtain a complete list of input sections
// that need to be linked. This is responsible to split section contents
// into small chunks for further processing.
//
// Note that this function is called from parallelForEach. This must be
// thread-safe (i.e. no memory allocation from the pools).
void MergeInputSegment::splitIntoPieces() {
assert(pieces.empty());
// As of now we only support WASM_SEG_FLAG_STRINGS but in the future we
// could add other types of splitting (see ELF's splitIntoPieces).
assert(segment->Data.LinkingFlags & WASM_SEG_FLAG_STRINGS);
splitStrings(data());
}
SegmentPiece *MergeInputSegment::getSegmentPiece(uint64_t offset) {
if (this->data().size() <= offset)
fatal(toString(this) + ": offset is outside the section");
// If Offset is not at beginning of a section piece, it is not in the map.
// In that case we need to do a binary search of the original section piece
// vector.
auto it = partition_point(
pieces, [=](SegmentPiece p) { return p.inputOff <= offset; });
return &it[-1];
}
// Returns the offset in an output section for a given input offset.
// Because contents of a mergeable section is not contiguous in output,
// it is not just an addition to a base output offset.
uint64_t MergeInputSegment::getParentOffset(uint64_t offset) const {
// If Offset is not at beginning of a section piece, it is not in the map.
// In that case we need to search from the original section piece vector.
const SegmentPiece *piece = getSegmentPiece(offset);
uint64_t addend = offset - piece->inputOff;
return piece->outputOff + addend;
}
uint32_t SyntheticMergedDataSegment::getSize() const {
return builder.getSize();
}
void SyntheticMergedDataSegment::writeTo(uint8_t *buf) const {
builder.write(buf + outSecOff);
// Apply relocations
relocate(buf + outSecOff);
}
void SyntheticMergedDataSegment::finalizeContents() {
// Add all string pieces to the string table builder to create section
// contents.
for (MergeInputSegment *sec : segments)
for (size_t i = 0, e = sec->pieces.size(); i != e; ++i)
if (sec->pieces[i].live)
builder.add(sec->getData(i));
// Fix the string table content. After this, the contents will never change.
builder.finalize();
// finalize() fixed tail-optimized strings, so we can now get
// offsets of strings. Get an offset for each string and save it
// to a corresponding SectionPiece for easy access.
for (MergeInputSegment *sec : segments)
for (size_t i = 0, e = sec->pieces.size(); i != e; ++i)
if (sec->pieces[i].live)
sec->pieces[i].outputOff = builder.getOffset(sec->getData(i));
}
uint64_t InputSection::getTombstoneForSection(StringRef name) {
// When a function is not live we need to update relocations referring to it.
// If they occur in DWARF debug symbols, we want to change the pc of the

View File

@ -24,6 +24,8 @@
#include "InputFiles.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/LLVM.h"
#include "llvm/ADT/CachedHashString.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Object/Wasm.h"
namespace lld {
@ -35,7 +37,14 @@ class OutputSection;
class InputChunk {
public:
enum Kind { DataSegment, Function, SyntheticFunction, Section };
enum Kind {
DataSegment,
Merge,
MergedSegment,
Function,
SyntheticFunction,
Section
};
Kind kind() const { return sectionKind; }
@ -43,6 +52,7 @@ public:
virtual uint32_t getInputSize() const { return getSize(); };
virtual void writeTo(uint8_t *buf) const;
void relocate(uint8_t *buf) const;
ArrayRef<WasmRelocation> getRelocations() const { return relocations; }
void setRelocations(ArrayRef<WasmRelocation> rs) { relocations = rs; }
@ -97,34 +107,147 @@ protected:
// each global variable.
class InputSegment : public InputChunk {
public:
InputSegment(const WasmSegment &seg, ObjFile *f)
InputSegment(const WasmSegment *seg, ObjFile *f)
: InputChunk(f, InputChunk::DataSegment), segment(seg) {
alignment = segment.Data.Alignment;
alignment = segment->Data.Alignment;
flags = segment->Data.LinkingFlags;
}
static bool classof(const InputChunk *c) { return c->kind() == DataSegment; }
InputSegment(uint32_t alignment, uint32_t flags)
: InputChunk(nullptr, InputChunk::DataSegment), alignment(alignment),
flags(flags) {}
static bool classof(const InputChunk *c) {
return c->kind() == DataSegment || c->kind() == Merge ||
c->kind() == MergedSegment;
}
void generateRelocationCode(raw_ostream &os) const;
StringRef getName() const override { return segment.Data.Name; }
StringRef getName() const override { return segment->Data.Name; }
StringRef getDebugName() const override { return StringRef(); }
uint32_t getComdat() const override { return segment.Data.Comdat; }
uint32_t getComdat() const override { return segment->Data.Comdat; }
uint32_t getInputSectionOffset() const override {
return segment.SectionOffset;
return segment->SectionOffset;
}
// Translate an offset in the input section to an offset in the output
// section.
uint64_t getOffset(uint64_t offset) const;
uint64_t getVA(uint64_t offset = 0) const;
const OutputSegment *outputSeg = nullptr;
uint32_t outputSegmentOffset = 0;
uint32_t alignment = 0;
bool isTLS() {
return getName().startswith(".tdata") || getName().startswith(".tbss");
}
protected:
ArrayRef<uint8_t> data() const override { return segment.Data.Content; }
const OutputSegment *outputSeg = nullptr;
uint32_t outputSegmentOffset = 0;
uint32_t alignment = 0;
uint32_t flags = 0;
const WasmSegment &segment;
protected:
ArrayRef<uint8_t> data() const override { return segment->Data.Content; }
const WasmSegment *segment = nullptr;
};
class SyntheticMergedDataSegment;
// Merge segment handling copied from lld/ELF/InputSection.h. Keep in sync
// where possible.
// SegmentPiece represents a piece of splittable segment contents.
// We allocate a lot of these and binary search on them. This means that they
// have to be as compact as possible, which is why we don't store the size (can
// be found by looking at the next one).
struct SegmentPiece {
SegmentPiece(size_t off, uint32_t hash, bool live)
: inputOff(off), live(live || !config->gcSections), hash(hash >> 1) {}
uint32_t inputOff;
uint32_t live : 1;
uint32_t hash : 31;
uint64_t outputOff = 0;
};
static_assert(sizeof(SegmentPiece) == 16, "SectionPiece is too big");
// This corresponds segments marked as WASM_SEG_FLAG_STRINGS.
class MergeInputSegment : public InputSegment {
public:
MergeInputSegment(const WasmSegment *seg, ObjFile *f) : InputSegment(seg, f) {
sectionKind = Merge;
}
static bool classof(const InputChunk *s) { return s->kind() == Merge; }
void splitIntoPieces();
// Translate an offset in the input section to an offset in the parent
// MergeSyntheticSection.
uint64_t getParentOffset(uint64_t offset) const;
// Splittable sections are handled as a sequence of data
// rather than a single large blob of data.
std::vector<SegmentPiece> pieces;
// Returns I'th piece's data. This function is very hot when
// string merging is enabled, so we want to inline.
LLVM_ATTRIBUTE_ALWAYS_INLINE
llvm::CachedHashStringRef getData(size_t i) const {
size_t begin = pieces[i].inputOff;
size_t end =
(pieces.size() - 1 == i) ? data().size() : pieces[i + 1].inputOff;
return {toStringRef(data().slice(begin, end - begin)), pieces[i].hash};
}
// Returns the SectionPiece at a given input section offset.
SegmentPiece *getSegmentPiece(uint64_t offset);
const SegmentPiece *getSegmentPiece(uint64_t offset) const {
return const_cast<MergeInputSegment *>(this)->getSegmentPiece(offset);
}
SyntheticMergedDataSegment *parent = nullptr;
private:
void splitStrings(ArrayRef<uint8_t> a);
};
// SyntheticMergedDataSegment is a class that allows us to put mergeable
// sections with different attributes in a single output sections. To do that we
// put them into SyntheticMergedDataSegment synthetic input sections which are
// attached to regular output sections.
class SyntheticMergedDataSegment : public InputSegment {
public:
SyntheticMergedDataSegment(StringRef name, uint32_t alignment, uint32_t flags)
: InputSegment(alignment, flags), name(name),
builder(llvm::StringTableBuilder::RAW, 1ULL << alignment) {
sectionKind = InputChunk::MergedSegment;
}
static bool classof(const InputChunk *c) {
return c->kind() == InputChunk::MergedSegment;
}
uint32_t getSize() const override;
StringRef getName() const override { return name; }
uint32_t getComdat() const override { return segments[0]->getComdat(); }
void writeTo(uint8_t *buf) const override;
void addMergeSegment(MergeInputSegment *ms) {
ms->parent = this;
segments.push_back(ms);
}
void finalizeContents();
protected:
std::vector<MergeInputSegment *> segments;
StringRef name;
llvm::StringTableBuilder builder;
};
// Represents a single wasm function within and input file. These are

View File

@ -425,6 +425,29 @@ void ObjFile::addLegacyIndirectFunctionTableIfNeeded(
config->legacyFunctionTable = true;
}
static bool shouldMerge(const WasmSegment &seg) {
// As of now we only support merging strings, and only with single byte
// alignment (2^0).
if (!(seg.Data.LinkingFlags & WASM_SEG_FLAG_STRINGS) ||
(seg.Data.Alignment != 0))
return false;
// On a regular link we don't merge sections if -O0 (default is -O1). This
// sometimes makes the linker significantly faster, although the output will
// be bigger.
if (config->optimize == 0)
return false;
// A mergeable section with size 0 is useless because they don't have
// any data to merge. A mergeable string section with size 0 can be
// argued as invalid because it doesn't end with a null character.
// We'll avoid a mess by handling them as if they were non-mergeable.
if (seg.Data.Content.size() == 0)
return false;
return true;
}
void ObjFile::parse(bool ignoreComdats) {
// Parse a memory buffer as a wasm file.
LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
@ -506,8 +529,13 @@ void ObjFile::parse(bool ignoreComdats) {
// Populate `Segments`.
for (const WasmSegment &s : wasmObj->dataSegments()) {
auto* seg = make<InputSegment>(s, this);
InputSegment *seg;
if (shouldMerge(s)) {
seg = make<MergeInputSegment>(&s, this);
} else
seg = make<InputSegment>(&s, this);
seg->discarded = isExcludedByComdat(seg);
segments.emplace_back(seg);
}
setRelocs(segments, dataSection);

View File

@ -0,0 +1,88 @@
//===- OutputSegment.h -----------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "OutputSegment.h"
#include "InputChunks.h"
#include "lld/Common/Memory.h"
#define DEBUG_TYPE "lld"
using namespace llvm;
using namespace llvm::wasm;
namespace lld {
namespace wasm {
void OutputSegment::addInputSegment(InputSegment *inSeg) {
alignment = std::max(alignment, inSeg->alignment);
inputSegments.push_back(inSeg);
size = llvm::alignTo(size, 1ULL << inSeg->alignment);
LLVM_DEBUG(dbgs() << "addInputSegment: " << inSeg->getName()
<< " oname=" << name << " size=" << inSeg->getSize()
<< " align=" << inSeg->alignment << " at:" << size << "\n");
inSeg->outputSeg = this;
inSeg->outputSegmentOffset = size;
size += inSeg->getSize();
}
// This function scans over the input segments.
//
// It removes MergeInputSegments from the input section array and adds
// new synthetic sections at the location of the first input section
// that it replaces. It then finalizes each synthetic section in order
// to compute an output offset for each piece of each input section.
void OutputSegment::finalizeInputSegments() {
LLVM_DEBUG(llvm::dbgs() << "finalizeInputSegments: " << name << "\n");
std::vector<SyntheticMergedDataSegment *> mergedSegments;
std::vector<InputSegment *> newSegments;
for (InputSegment *s : inputSegments) {
MergeInputSegment *ms = dyn_cast<MergeInputSegment>(s);
if (!ms) {
newSegments.push_back(s);
continue;
}
// A segment should not make it here unless its alive
assert(ms->live);
auto i =
llvm::find_if(mergedSegments, [=](SyntheticMergedDataSegment *seg) {
return seg->flags == ms->flags && seg->alignment == ms->alignment;
});
if (i == mergedSegments.end()) {
LLVM_DEBUG(llvm::dbgs() << "new merge section: " << name
<< " alignment=" << ms->alignment << "\n");
SyntheticMergedDataSegment *syn =
make<SyntheticMergedDataSegment>(name, ms->alignment, ms->flags);
syn->outputSeg = this;
mergedSegments.push_back(syn);
i = std::prev(mergedSegments.end());
newSegments.push_back(syn);
} else {
LLVM_DEBUG(llvm::dbgs() << "adding to merge section: " << name << "\n");
}
(*i)->addMergeSegment(ms);
}
for (auto *ms : mergedSegments)
ms->finalizeContents();
inputSegments = newSegments;
size = 0;
for (InputSegment *seg : inputSegments) {
size = llvm::alignTo(size, 1ULL << seg->alignment);
LLVM_DEBUG(llvm::dbgs() << "outputSegmentOffset set: " << seg->getName()
<< " -> " << size << "\n");
seg->outputSegmentOffset = size;
size += seg->getSize();
}
}
} // namespace wasm
} // namespace lld

View File

@ -22,21 +22,15 @@ class OutputSegment {
public:
OutputSegment(StringRef n) : name(n) {}
void addInputSegment(InputSegment *inSeg) {
uint32_t segAlign = inSeg->alignment;
alignment = std::max(alignment, segAlign);
inputSegments.push_back(inSeg);
size = llvm::alignTo(size, 1ULL << segAlign);
inSeg->outputSeg = this;
inSeg->outputSegmentOffset = size;
size += inSeg->getSize();
}
void addInputSegment(InputSegment *inSeg);
void finalizeInputSegments();
bool isTLS() const { return name == ".tdata"; }
StringRef name;
bool isBss = false;
uint32_t index = 0;
uint32_t linkingFlags = 0;
uint32_t initFlags = 0;
uint32_t sectionOffset = 0;
uint32_t alignment = 0;

View File

@ -146,6 +146,7 @@ bool Symbol::isLive() const {
void Symbol::markLive() {
assert(!isDiscarded());
referenced = true;
if (file != NULL && isDefined())
file->markLive();
if (auto *g = dyn_cast<DefinedGlobal>(this))
@ -154,9 +155,17 @@ void Symbol::markLive() {
e->event->live = true;
if (auto *t = dyn_cast<DefinedTable>(this))
t->table->live = true;
if (InputChunk *c = getChunk())
if (InputChunk *c = getChunk()) {
// Usually, a whole chunk is marked as live or dead, but in mergeable
// (splittable) sections, each piece of data has independent liveness bit.
// So we explicitly tell it which offset is in use.
if (auto *d = dyn_cast<DefinedData>(this)) {
if (auto *ms = dyn_cast<MergeInputSegment>(c)) {
ms->getSegmentPiece(d->value)->live = true;
}
}
c->live = true;
referenced = true;
}
}
uint32_t Symbol::getOutputSymbolIndex() const {

View File

@ -536,7 +536,7 @@ void LinkingSection::writeBody() {
for (const OutputSegment *s : dataSegments) {
writeStr(sub.os, s->name, "segment name");
writeUleb128(sub.os, s->alignment, "alignment");
writeUleb128(sub.os, 0, "flags");
writeUleb128(sub.os, s->linkingFlags, "flags");
}
sub.writeTo(os);
}

View File

@ -870,7 +870,6 @@ void Writer::createOutputSegments() {
s = segmentMap[name];
}
s->addInputSegment(segment);
LLVM_DEBUG(dbgs() << "added data: " << name << ": " << s->size << "\n");
}
}
@ -890,6 +889,11 @@ void Writer::createOutputSegments() {
for (size_t i = 0; i < segments.size(); ++i)
segments[i]->index = i;
// Merge MergeInputSections into a single MergeSyntheticSection.
LLVM_DEBUG(dbgs() << "-- finalize input semgments\n");
for (OutputSegment *seg : segments)
seg->finalizeInputSegments();
}
void Writer::combineOutputSegments() {
@ -910,6 +914,7 @@ void Writer::combineOutputSegments() {
new_segments.push_back(s);
} else {
if (!combined) {
LLVM_DEBUG(dbgs() << "created combined output segment: .data\n");
combined = make<OutputSegment>(".data");
combined->startVA = s->startVA;
if (config->sharedMemory)
@ -926,6 +931,8 @@ void Writer::combineOutputSegments() {
combined->addInputSegment(inSeg);
#ifndef NDEBUG
uint64_t newVA = inSeg->getVA();
LLVM_DEBUG(dbgs() << "added input segment. name=" << inSeg->getName()
<< " oldVA=" << oldVA << " newVA=" << newVA << "\n");
assert(oldVA == newVA);
#endif
}

View File

@ -154,7 +154,7 @@ struct WasmDataSegment {
ArrayRef<uint8_t> Content;
StringRef Name; // from the "segment info" section
uint32_t Alignment;
uint32_t LinkerFlags;
uint32_t LinkingFlags;
uint32_t Comdat; // from the "comdat info" section
};
@ -357,6 +357,10 @@ enum WasmSymbolType : unsigned {
WASM_SYMBOL_TYPE_TABLE = 0x5,
};
enum WasmSegmentFlag : unsigned {
WASM_SEG_FLAG_STRINGS = 0x1,
};
// Kinds of event attributes.
enum WasmEventAttribute : unsigned {
WASM_EVENT_ATTRIBUTE_EXCEPTION = 0x0,

View File

@ -611,26 +611,27 @@ namespace llvm {
unsigned UniqueID = GenericSectionID);
MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K) {
return getWasmSection(Section, K, nullptr);
return getWasmSection(Section, K, 0, nullptr);
}
MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
const char *BeginSymName) {
return getWasmSection(Section, K, "", ~0, BeginSymName);
unsigned Flags, const char *BeginSymName) {
return getWasmSection(Section, K, Flags, "", ~0, BeginSymName);
}
MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
const Twine &Group, unsigned UniqueID) {
return getWasmSection(Section, K, Group, UniqueID, nullptr);
unsigned Flags, const Twine &Group,
unsigned UniqueID) {
return getWasmSection(Section, K, Flags, Group, UniqueID, nullptr);
}
MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
const Twine &Group, unsigned UniqueID,
const char *BeginSymName);
unsigned Flags, const Twine &Group,
unsigned UniqueID, const char *BeginSymName);
MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
const MCSymbolWasm *Group, unsigned UniqueID,
const char *BeginSymName);
unsigned Flags, const MCSymbolWasm *Group,
unsigned UniqueID, const char *BeginSymName);
MCSectionXCOFF *getXCOFFSection(
StringRef Section, SectionKind K,

View File

@ -37,14 +37,18 @@ class MCSectionWasm final : public MCSection {
// segment
uint32_t SegmentIndex = 0;
// Whether this data segment is passive
// For data sections, whether to use a passive segment
bool IsPassive = false;
// For data sections, bitfield of WasmSegmentFlag
unsigned SegmentFlags;
// The storage of Name is owned by MCContext's WasmUniquingMap.
friend class MCContext;
MCSectionWasm(StringRef Name, SectionKind K, const MCSymbolWasm *group,
unsigned UniqueID, MCSymbol *Begin)
: MCSection(SV_Wasm, Name, K, Begin), UniqueID(UniqueID), Group(group) {}
MCSectionWasm(StringRef Name, SectionKind K, unsigned SegmentFlags,
const MCSymbolWasm *Group, unsigned UniqueID, MCSymbol *Begin)
: MCSection(SV_Wasm, Name, K, Begin), UniqueID(UniqueID), Group(Group),
SegmentFlags(SegmentFlags) {}
public:
/// Decides whether a '.section' directive should be printed before the
@ -52,6 +56,7 @@ public:
bool shouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
const MCSymbolWasm *getGroup() const { return Group; }
unsigned getSegmentFlags() const { return SegmentFlags; }
void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
raw_ostream &OS,

View File

@ -21,6 +21,7 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@ -2005,6 +2006,17 @@ static const Comdat *getWasmComdat(const GlobalValue *GV) {
return C;
}
static unsigned getWasmSectionFlags(SectionKind K) {
unsigned Flags = 0;
// TODO(sbc): Add suport for K.isMergeableConst()
if (K.isMergeableCString())
Flags |= wasm::WASM_SEG_FLAG_STRINGS;
return Flags;
}
MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
// We don't support explict section names for functions in the wasm object
@ -2028,9 +2040,9 @@ MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
Group = C->getName();
}
MCSectionWasm* Section =
getContext().getWasmSection(Name, Kind, Group,
MCContext::GenericSectionID);
unsigned Flags = getWasmSectionFlags(Kind);
MCSectionWasm *Section = getContext().getWasmSection(
Name, Kind, Flags, Group, MCContext::GenericSectionID);
return Section;
}
@ -2062,7 +2074,8 @@ static MCSectionWasm *selectWasmSectionForGlobal(
(*NextUniqueID)++;
}
return Ctx.getWasmSection(Name, Kind, Group, UniqueID);
unsigned Flags = getWasmSectionFlags(Kind);
return Ctx.getWasmSection(Name, Kind, Flags, Group, UniqueID);
}
MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal(

View File

@ -672,7 +672,8 @@ MCSectionCOFF *MCContext::getAssociativeCOFFSection(MCSectionCOFF *Sec,
}
MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind K,
const Twine &Group, unsigned UniqueID,
unsigned Flags, const Twine &Group,
unsigned UniqueID,
const char *BeginSymName) {
MCSymbolWasm *GroupSym = nullptr;
if (!Group.isTriviallyEmpty() && !Group.str().empty()) {
@ -680,10 +681,11 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind K,
GroupSym->setComdat(true);
}
return getWasmSection(Section, K, GroupSym, UniqueID, BeginSymName);
return getWasmSection(Section, K, Flags, GroupSym, UniqueID, BeginSymName);
}
MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind,
unsigned Flags,
const MCSymbolWasm *GroupSym,
unsigned UniqueID,
const char *BeginSymName) {
@ -704,7 +706,7 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind,
cast<MCSymbolWasm>(Begin)->setType(wasm::WASM_SYMBOL_TYPE_SECTION);
MCSectionWasm *Result = new (WasmAllocator.Allocate())
MCSectionWasm(CachedName, Kind, GroupSym, UniqueID, Begin);
MCSectionWasm(CachedName, Kind, Flags, GroupSym, UniqueID, Begin);
Entry.second = Result;
auto *F = new MCDataFragment();

View File

@ -855,9 +855,9 @@ void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) {
// DWP Sections
DwarfCUIndexSection =
Ctx->getWasmSection(".debug_cu_index", SectionKind::getMetadata(), 0);
Ctx->getWasmSection(".debug_cu_index", SectionKind::getMetadata());
DwarfTUIndexSection =
Ctx->getWasmSection(".debug_tu_index", SectionKind::getMetadata(), 0);
Ctx->getWasmSection(".debug_tu_index", SectionKind::getMetadata());
// Wasm use data section for LSDA.
// TODO Consider putting each function's exception table in a separate
@ -1008,8 +1008,8 @@ MCSection *MCObjectFileInfo::getDwarfComdatSection(const char *Name,
return Ctx->getELFSection(Name, ELF::SHT_PROGBITS, ELF::SHF_GROUP, 0,
utostr(Hash), /*IsComdat=*/true);
case Triple::Wasm:
return Ctx->getWasmSection(Name, SectionKind::getMetadata(), utostr(Hash),
MCContext::GenericSectionID);
return Ctx->getWasmSection(Name, SectionKind::getMetadata(), 0,
utostr(Hash), MCContext::GenericSectionID);
case Triple::MachO:
case Triple::COFF:
case Triple::GOFF:

View File

@ -90,7 +90,8 @@ public:
return false;
}
bool parseSectionFlags(StringRef FlagStr, bool &Passive, bool &Group) {
uint32_t parseSectionFlags(StringRef FlagStr, bool &Passive, bool &Group) {
uint32_t flags = 0;
for (char C : FlagStr) {
switch (C) {
case 'p':
@ -99,12 +100,14 @@ public:
case 'G':
Group = true;
break;
case 'S':
flags |= wasm::WASM_SEG_FLAG_STRINGS;
break;
default:
return Parser->Error(getTok().getLoc(),
StringRef("Unexepcted section flag: ") + FlagStr);
return -1U;
}
}
return false;
return flags;
}
bool parseGroup(StringRef &GroupName) {
@ -128,7 +131,7 @@ public:
return false;
}
bool parseSectionDirective(StringRef, SMLoc) {
bool parseSectionDirective(StringRef, SMLoc loc) {
StringRef Name;
if (Parser->parseIdentifier(Name))
return TokError("expected identifier in directive");
@ -156,8 +159,10 @@ public:
// Update section flags if present in this .section directive
bool Passive = false;
bool Group = false;
if (parseSectionFlags(getTok().getStringContents(), Passive, Group))
return true;
uint32_t Flags =
parseSectionFlags(getTok().getStringContents(), Passive, Group);
if (Flags == -1U)
return TokError("unknown flag");
Lex();
@ -173,13 +178,19 @@ public:
// TODO: Parse UniqueID
MCSectionWasm *WS = getContext().getWasmSection(
Name, Kind.getValue(), GroupName, MCContext::GenericSectionID);
Name, Kind.getValue(), Flags, GroupName, MCContext::GenericSectionID);
if (WS->getSegmentFlags() != Flags)
Parser->Error(loc, "changed section flags for " + Name +
", expected: 0x" +
utohexstr(WS->getSegmentFlags()));
if (Passive) {
if (!WS->isWasmData())
return Parser->Error(getTok().getLoc(),
"Only data sections can be passive");
return Parser->Error(loc, "Only data sections can be passive");
WS->setPassive();
}
getStreamer().SwitchSection(WS);
return false;
}

View File

@ -64,9 +64,11 @@ void MCSectionWasm::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
OS << ",\"";
if (IsPassive)
OS << "p";
OS << 'p';
if (Group)
OS << "G";
OS << 'G';
if (SegmentFlags & wasm::WASM_SEG_FLAG_STRINGS)
OS << 'S';
OS << '"';

View File

@ -67,7 +67,7 @@ struct WasmDataSegment {
uint32_t InitFlags;
uint64_t Offset;
uint32_t Alignment;
uint32_t LinkerFlags;
uint32_t LinkingFlags;
SmallVector<char, 4> Data;
};
@ -1133,7 +1133,7 @@ void WasmObjectWriter::writeLinkingMetaDataSection(
for (const WasmDataSegment &Segment : DataSegments) {
writeString(Segment.Name);
encodeULEB128(Segment.Alignment, W->OS);
encodeULEB128(Segment.LinkerFlags, W->OS);
encodeULEB128(Segment.LinkingFlags, W->OS);
}
endSection(SubSection);
}
@ -1440,7 +1440,7 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
Segment.Section = &Section;
addData(Segment.Data, Section);
Segment.Alignment = Log2_32(Section.getAlignment());
Segment.LinkerFlags = 0;
Segment.LinkingFlags = Section.getSegmentFlags();
DataSize += Segment.Data.size();
Section.setSegmentIndex(SegmentIndex);

View File

@ -462,7 +462,7 @@ Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) {
for (uint32_t I = 0; I < Count; I++) {
DataSegments[I].Data.Name = readString(Ctx);
DataSegments[I].Data.Alignment = readVaruint32(Ctx);
DataSegments[I].Data.LinkerFlags = readVaruint32(Ctx);
DataSegments[I].Data.LinkingFlags = readVaruint32(Ctx);
}
break;
}
@ -1431,7 +1431,7 @@ Error WasmObjectFile::parseDataSection(ReadContext &Ctx) {
// The rest of these Data fields are set later, when reading in the linking
// metadata section.
Segment.Data.Alignment = 0;
Segment.Data.LinkerFlags = 0;
Segment.Data.LinkingFlags = 0;
Segment.Data.Comdat = UINT32_MAX;
Segment.SectionOffset = Ctx.Ptr - Ctx.Start;
Ctx.Ptr += Size;

View File

@ -541,7 +541,11 @@ void ScalarBitSetTraits<WasmYAML::LimitFlags>::bitset(
}
void ScalarBitSetTraits<WasmYAML::SegmentFlags>::bitset(
IO &IO, WasmYAML::SegmentFlags &Value) {}
IO &IO, WasmYAML::SegmentFlags &Value) {
#define BCase(X) IO.bitSetCase(Value, #X, wasm::WASM_SEG_FLAG_##X)
BCase(STRINGS);
#undef BCase
}
void ScalarBitSetTraits<WasmYAML::SymbolFlags>::bitset(
IO &IO, WasmYAML::SymbolFlags &Value) {

View File

@ -1070,7 +1070,7 @@ public:
if (Group)
WasmSym->setComdat(true);
auto *WS =
getContext().getWasmSection(SecName, SectionKind::getText(), Group,
getContext().getWasmSection(SecName, SectionKind::getText(), 0, Group,
MCContext::GenericSectionID, nullptr);
getStreamer().SwitchSection(WS);
// Also generate DWARF for this section if requested.

View File

@ -0,0 +1,7 @@
# RUN: not llvm-mc -triple=wasm32-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s --implicit-check-not=error:
foo:
.section .foo,"S",@
# CHECK: {{.*}}.s:[[# @LINE+1]]:1: error: changed section flags for .foo, expected: 0x1
.section .foo,"",@

View File

@ -100,7 +100,7 @@ WasmDumper::dumpCustomSection(const WasmSection &WasmSec) {
SegmentInfo.Name = Segment.Data.Name;
SegmentInfo.Index = SegmentIndex;
SegmentInfo.Alignment = Segment.Data.Alignment;
SegmentInfo.Flags = Segment.Data.LinkerFlags;
SegmentInfo.Flags = Segment.Data.LinkingFlags;
LinkingSec->SegmentInfos.push_back(SegmentInfo);
}
if (Segment.Data.Comdat != UINT32_MAX) {