[lld][WebAssembly] Enable string tail merging in debug sections

This is a followup to https://reviews.llvm.org/D97657 which
applied string tail merging to data segments.

Fixes: https://bugs.llvm.org/show_bug.cgi?id=48828

Differential Revision: https://reviews.llvm.org/D102436
This commit is contained in:
Sam Clegg 2021-05-12 16:48:34 -07:00
parent 8dd5ef01ef
commit 45b7cf9955
15 changed files with 143 additions and 57 deletions

View File

@ -0,0 +1,4 @@
.section .debug_str,"S",@
.asciz "clang version 13.0.0"
.asciz "bar"
.asciz "foo"

View File

@ -0,0 +1,22 @@
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t.o
# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/merge-string-debug2.s -o %t2.o
# RUN: wasm-ld %t.o %t2.o -o %t.wasm --no-entry
# RUN: llvm-readobj -x .debug_str %t.wasm | FileCheck %s --check-prefixes CHECK,CHECK-O1
# RUN: wasm-ld -O0 %t.o %t2.o -o %tO0.wasm --no-entry
# RUN: llvm-readobj -x .debug_str %tO0.wasm | FileCheck %s --check-prefixes CHECK,CHECK-O0
.section .debug_str,"S",@
.asciz "clang version 13.0.0"
.asciz "foobar"
# CHECK: Hex dump of section '.debug_str':
# CHECK-O0: 0x00000000 636c616e 67207665 7273696f 6e203133 clang version 13
# CHECK-O0: 0x00000010 2e302e30 00666f6f 62617200 636c616e .0.0.foobar.clan
# CHECK-O0: 0x00000020 67207665 7273696f 6e203133 2e302e30 g version 13.0.0
# CHECK-O0: 0x00000030 00626172 00666f6f 00 .bar.foo.
# CHECK-O1: 0x00000000 666f6f62 61720066 6f6f0063 6c616e67 foobar.foo.clang
# CHECK-O1: 0x00000010 20766572 73696f6e 2031332e 302e3000 version 13.0.0.

View File

@ -796,7 +796,7 @@ static void wrapSymbols(ArrayRef<WrappedSymbol> wrapped) {
}
static void splitSections() {
// splitIntoPieces needs to be called on each MergeInputSection
// splitIntoPieces needs to be called on each MergeInputChunk
// before calling finalizeContents().
LLVM_DEBUG(llvm::dbgs() << "splitSections\n");
parallelForEach(symtab->objectFiles, [](ObjFile *file) {
@ -804,6 +804,10 @@ static void splitSections() {
if (auto *s = dyn_cast<MergeInputChunk>(seg))
s->splitIntoPieces();
}
for (InputChunk *sec : file->customSections) {
if (auto *s = dyn_cast<MergeInputChunk>(sec))
s->splitIntoPieces();
}
});
}

View File

@ -328,24 +328,24 @@ void InputFunction::writeCompressed(uint8_t *buf) const {
LLVM_DEBUG(dbgs() << " total: " << (buf + chunkSize - orig) << "\n");
}
uint64_t InputChunk::getOffset(uint64_t offset) const {
return outSecOff + offset;
}
uint64_t InputChunk::getSegmentOffset(uint64_t offset) const {
uint64_t InputChunk::getChunkOffset(uint64_t offset) const {
if (const auto *ms = dyn_cast<MergeInputChunk>(this)) {
LLVM_DEBUG(dbgs() << "getSegmentOffset(merged): " << getName() << "\n");
LLVM_DEBUG(dbgs() << "getChunkOffset(merged): " << getName() << "\n");
LLVM_DEBUG(dbgs() << "offset: " << offset << "\n");
LLVM_DEBUG(dbgs() << "parentOffset: " << ms->getParentOffset(offset)
<< "\n");
assert(ms->parent);
return ms->parent->getSegmentOffset(ms->getParentOffset(offset));
return ms->parent->getChunkOffset(ms->getParentOffset(offset));
}
return outputSegmentOffset + offset;
}
uint64_t InputChunk::getOffset(uint64_t offset) const {
return outSecOff + getChunkOffset(offset);
}
uint64_t InputChunk::getVA(uint64_t offset) const {
return (outputSeg ? outputSeg->startVA : 0) + getSegmentOffset(offset);
return (outputSeg ? outputSeg->startVA : 0) + getChunkOffset(offset);
}
// Generate code to apply relocations to the data section at runtime.

View File

@ -43,7 +43,7 @@ public:
MergedChunk,
Function,
SyntheticFunction,
Section
Section,
};
StringRef name;
@ -62,12 +62,15 @@ public:
ArrayRef<WasmRelocation> getRelocations() const { return relocations; }
void setRelocations(ArrayRef<WasmRelocation> rs) { relocations = rs; }
// Translate an offset in the input section to an offset in the output
// Translate an offset into the input chunk to an offset in the output
// section.
uint64_t getOffset(uint64_t offset) const;
// For data segments, translate and offset into the input segment into
// an offset into the output segment
uint64_t getSegmentOffset(uint64_t offset) const;
// Translate an offset into the input chunk into an offset into the output
// chunk. For data segments (InputSegment) this will return and offset into
// the output segment. For MergeInputChunk, this will return an offset into
// the parent merged chunk. For other chunk types this is no-op and we just
// return unmodified offset.
uint64_t getChunkOffset(uint64_t offset) const;
uint64_t getVA(uint64_t offset = 0) const;
uint32_t getComdat() const { return comdat; }
@ -132,22 +135,19 @@ protected:
// each global variable.
class InputSegment : public InputChunk {
public:
InputSegment(const WasmSegment *seg, ObjFile *f)
: InputChunk(f, InputChunk::DataSegment, seg->Data.Name,
seg->Data.Alignment, seg->Data.LinkingFlags),
InputSegment(const WasmSegment &seg, ObjFile *f)
: InputChunk(f, InputChunk::DataSegment, seg.Data.Name,
seg.Data.Alignment, seg.Data.LinkingFlags),
segment(seg) {
rawData = segment->Data.Content;
comdat = segment->Data.Comdat;
inputSectionOffset = segment->SectionOffset;
rawData = segment.Data.Content;
comdat = segment.Data.Comdat;
inputSectionOffset = segment.SectionOffset;
}
InputSegment(StringRef name, uint32_t alignment, uint32_t flags)
: InputChunk(nullptr, InputChunk::DataSegment, name, alignment, flags) {}
static bool classof(const InputChunk *c) { return c->kind() == DataSegment; }
protected:
const WasmSegment *segment = nullptr;
const WasmSegment &segment;
};
class SyntheticMergedChunk;
@ -174,12 +174,19 @@ static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big");
// This corresponds segments marked as WASM_SEG_FLAG_STRINGS.
class MergeInputChunk : public InputChunk {
public:
MergeInputChunk(const WasmSegment *seg, ObjFile *f)
: InputChunk(f, Merge, seg->Data.Name, seg->Data.Alignment,
seg->Data.LinkingFlags) {
rawData = seg->Data.Content;
comdat = seg->Data.Comdat;
inputSectionOffset = seg->SectionOffset;
MergeInputChunk(const WasmSegment &seg, ObjFile *f)
: InputChunk(f, Merge, seg.Data.Name, seg.Data.Alignment,
seg.Data.LinkingFlags) {
rawData = seg.Data.Content;
comdat = seg.Data.Comdat;
inputSectionOffset = seg.SectionOffset;
}
MergeInputChunk(const WasmSection &s, ObjFile *f)
: InputChunk(f, Merge, s.Name, 0, llvm::wasm::WASM_SEG_FLAG_STRINGS) {
assert(s.Type == llvm::wasm::WASM_SEC_CUSTOM);
comdat = s.Comdat;
rawData = s.Content;
}
static bool classof(const InputChunk *s) { return s->kind() == Merge; }

View File

@ -360,6 +360,17 @@ void ObjFile::addLegacyIndirectFunctionTableIfNeeded(
config->legacyFunctionTable = true;
}
static bool shouldMerge(const WasmSection &sec) {
if (config->optimize == 0)
return false;
// Sadly we don't have section attributes yet for custom sections, so we
// currently go by the name alone.
// TODO(sbc): Add ability for wasm sections to carry flags so we don't
// need to use names here.
return sec.Name.startswith(".debug_str") ||
sec.Name.startswith(".debug_line_str");
}
static bool shouldMerge(const WasmSegment &seg) {
// As of now we only support merging strings, and only with single byte
// alignment (2^0).
@ -445,7 +456,11 @@ void ObjFile::parse(bool ignoreComdats) {
assert(!dataSection);
dataSection = &section;
} else if (section.Type == WASM_SEC_CUSTOM) {
auto *customSec = make<InputSection>(section, this);
InputChunk *customSec;
if (shouldMerge(section))
customSec = make<MergeInputChunk>(section, this);
else
customSec = make<InputSection>(section, this);
customSec->discarded = isExcludedByComdat(customSec);
customSections.emplace_back(customSec);
customSections.back()->setRelocations(section.Relocations);
@ -466,9 +481,9 @@ void ObjFile::parse(bool ignoreComdats) {
for (const WasmSegment &s : wasmObj->dataSegments()) {
InputChunk *seg;
if (shouldMerge(s)) {
seg = make<MergeInputChunk>(&s, this);
seg = make<MergeInputChunk>(s, this);
} else
seg = make<InputSegment>(&s, this);
seg = make<InputSegment>(s, this);
seg->discarded = isExcludedByComdat(seg);
segments.emplace_back(seg);
@ -585,7 +600,7 @@ Symbol *ObjFile::createDefined(const WasmSymbol &sym) {
return symtab->addDefinedGlobal(name, flags, this, global);
}
case WASM_SYMBOL_TYPE_SECTION: {
InputSection *section = customSectionsByIndex[sym.Info.ElementIndex];
InputChunk *section = customSectionsByIndex[sym.Info.ElementIndex];
assert(sym.isBindingLocal());
// Need to return null if discarded here? data and func only do that when
// binding is not local.

View File

@ -141,8 +141,8 @@ public:
std::vector<InputGlobal *> globals;
std::vector<InputEvent *> events;
std::vector<InputTable *> tables;
std::vector<InputSection *> customSections;
llvm::DenseMap<uint32_t, InputSection *> customSectionsByIndex;
std::vector<InputChunk *> customSections;
llvm::DenseMap<uint32_t, InputChunk *> customSectionsByIndex;
Symbol *getSymbol(uint32_t index) const { return symbols[index]; }
FunctionSymbol *getFunctionSymbol(uint32_t index) const;

View File

@ -12,6 +12,7 @@
#include "OutputSegment.h"
#include "WriterUtils.h"
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Memory.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/Parallel.h"
@ -234,13 +235,42 @@ bool DataSection::isNeeded() const {
return false;
}
// Lots of duplication here with OutputSegment::finalizeInputSegments
void CustomSection::finalizeInputSections() {
SyntheticMergedChunk *mergedSection = nullptr;
std::vector<InputChunk *> newSections;
for (InputChunk *s : inputSections) {
MergeInputChunk *ms = dyn_cast<MergeInputChunk>(s);
if (!ms) {
newSections.push_back(s);
continue;
}
if (!mergedSection) {
mergedSection =
make<SyntheticMergedChunk>(name, 0, WASM_SEG_FLAG_STRINGS);
newSections.push_back(mergedSection);
}
mergedSection->addMergeChunk(ms);
}
if (!mergedSection)
return;
mergedSection->finalizeContents();
inputSections = newSections;
}
void CustomSection::finalizeContents() {
finalizeInputSections();
raw_string_ostream os(nameData);
encodeULEB128(name.size(), os);
os << name;
os.flush();
for (InputSection *section : inputSections) {
for (InputChunk *section : inputSections) {
assert(!section->discarded);
section->outputSec = this;
section->outSecOff = payloadSize;
@ -264,19 +294,19 @@ void CustomSection::writeTo(uint8_t *buf) {
buf += nameData.size();
// Write custom sections payload
for (const InputSection *section : inputSections)
for (const InputChunk *section : inputSections)
section->writeTo(buf);
}
uint32_t CustomSection::getNumRelocations() const {
uint32_t count = 0;
for (const InputSection *inputSect : inputSections)
for (const InputChunk *inputSect : inputSections)
count += inputSect->getNumRelocations();
return count;
}
void CustomSection::writeRelocations(raw_ostream &os) const {
for (const InputSection *s : inputSections)
for (const InputChunk *s : inputSections)
s->writeRelocations(os);
}

View File

@ -111,7 +111,7 @@ protected:
// separately and are instead synthesized by the linker.
class CustomSection : public OutputSection {
public:
CustomSection(std::string name, ArrayRef<InputSection *> inputSections)
CustomSection(std::string name, ArrayRef<InputChunk *> inputSections)
: OutputSection(llvm::wasm::WASM_SEC_CUSTOM, name),
inputSections(inputSections) {}
@ -128,8 +128,9 @@ public:
void finalizeContents() override;
protected:
void finalizeInputSections();
size_t payloadSize = 0;
ArrayRef<InputSection *> inputSections;
std::vector<InputChunk *> inputSections;
std::string nameData;
};

View File

@ -55,16 +55,15 @@ void OutputSegment::finalizeInputSegments() {
return seg->flags == ms->flags && seg->alignment == ms->alignment;
});
if (i == mergedSegments.end()) {
LLVM_DEBUG(llvm::dbgs() << "new merge section: " << name
LLVM_DEBUG(llvm::dbgs() << "new merge segment: " << name
<< " alignment=" << ms->alignment << "\n");
SyntheticMergedChunk *syn =
make<SyntheticMergedChunk>(name, ms->alignment, ms->flags);
auto *syn = make<SyntheticMergedChunk>(name, ms->alignment, ms->flags);
syn->outputSeg = this;
mergedSegments.push_back(syn);
i = std::prev(mergedSegments.end());
newSegments.push_back(syn);
} else {
LLVM_DEBUG(llvm::dbgs() << "adding to merge section: " << name << "\n");
LLVM_DEBUG(llvm::dbgs() << "adding to merge segment: " << name << "\n");
}
(*i)->addMergeChunk(ms);
}

View File

@ -301,7 +301,7 @@ void DefinedData::setVA(uint64_t value_) {
uint64_t DefinedData::getOutputSegmentOffset() const {
LLVM_DEBUG(dbgs() << "getOutputSegmentOffset: " << getName() << "\n");
return segment->getSegmentOffset(value);
return segment->getChunkOffset(value);
}
uint64_t DefinedData::getOutputSegmentIndex() const {

View File

@ -254,14 +254,14 @@ public:
class SectionSymbol : public Symbol {
public:
SectionSymbol(uint32_t flags, const InputSection *s, InputFile *f = nullptr)
SectionSymbol(uint32_t flags, const InputChunk *s, InputFile *f = nullptr)
: Symbol("", SectionKind, flags, f), section(s) {}
static bool classof(const Symbol *s) { return s->kind() == SectionKind; }
const OutputSectionSymbol *getOutputSectionSymbol() const;
const InputSection *section;
const InputChunk *section;
};
class DataSymbol : public Symbol {

View File

@ -100,7 +100,7 @@ private:
uint64_t fileSize = 0;
std::vector<WasmInitEntry> initFunctions;
llvm::StringMap<std::vector<InputSection *>> customSectionMapping;
llvm::StringMap<std::vector<InputChunk *>> customSectionMapping;
// Stable storage for command export wrapper function name strings.
std::list<std::string> commandExportWrapperNames;
@ -121,7 +121,7 @@ void Writer::calculateCustomSections() {
log("calculateCustomSections");
bool stripDebug = config->stripDebug || config->stripAll;
for (ObjFile *file : symtab->objectFiles) {
for (InputSection *section : file->customSections) {
for (InputChunk *section : file->customSections) {
// Exclude COMDAT sections that are not selected for inclusion
if (section->discarded)
continue;

View File

@ -610,8 +610,9 @@ namespace llvm {
getAssociativeCOFFSection(MCSectionCOFF *Sec, const MCSymbol *KeySym,
unsigned UniqueID = GenericSectionID);
MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K) {
return getWasmSection(Section, K, 0, nullptr);
MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
unsigned Flags = 0) {
return getWasmSection(Section, K, Flags, nullptr);
}
MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,

View File

@ -11,6 +11,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSection.h"
@ -791,9 +792,10 @@ void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) {
DwarfLineSection =
Ctx->getWasmSection(".debug_line", SectionKind::getMetadata());
DwarfLineStrSection =
Ctx->getWasmSection(".debug_line_str", SectionKind::getMetadata());
DwarfStrSection =
Ctx->getWasmSection(".debug_str", SectionKind::getMetadata());
Ctx->getWasmSection(".debug_line_str", SectionKind::getMetadata(),
wasm::WASM_SEG_FLAG_STRINGS);
DwarfStrSection = Ctx->getWasmSection(
".debug_str", SectionKind::getMetadata(), wasm::WASM_SEG_FLAG_STRINGS);
DwarfLocSection =
Ctx->getWasmSection(".debug_loc", SectionKind::getMetadata());
DwarfAbbrevSection =
@ -836,7 +838,8 @@ void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) {
DwarfAbbrevDWOSection =
Ctx->getWasmSection(".debug_abbrev.dwo", SectionKind::getMetadata());
DwarfStrDWOSection =
Ctx->getWasmSection(".debug_str.dwo", SectionKind::getMetadata());
Ctx->getWasmSection(".debug_str.dwo", SectionKind::getMetadata(),
wasm::WASM_SEG_FLAG_STRINGS);
DwarfLineDWOSection =
Ctx->getWasmSection(".debug_line.dwo", SectionKind::getMetadata());
DwarfLocDWOSection =