[ELF] Add OVERWRITE_SECTIONS command

This implements https://sourceware.org/bugzilla/show_bug.cgi?id=26404

An `OVERWRITE_SECTIONS` command is a `SECTIONS` variant which contains several
output section descriptions. The output sections do not have specify an order.
Similar to `INSERT [BEFORE|AFTER]`, `LinkerScript::hasSectionsCommand` is not
set, so the built-in rules (see `docs/ELF/linker_script.rst`) still apply.
`OVERWRITE_SECTIONS` can be more convenient than `INSERT` because it does not
need an anchor section.

The initial syntax is intentionally narrow to facilitate backward compatible
extensions in the future. Symbol assignments cannot be used.

This feature is versatile. To list a few usage:

* Use `section : { KEEP(...) }` to retain input sections under GC
* Define encapsulation symbols (start/end) for an output section
* Use `section : ALIGN(...) : { ... }` to overalign an output section (similar to ld64 `-sectalign`)

When an output section is specified by both `OVERWRITE_SECTIONS` and
`INSERT`, `INSERT` is processed after overwrite sections. To make this work,
this patch changes `InsertCommand` to use name based matching instead of pointer
based matching. (This may cause a difference when `INSERT` moves one output
section more than once. Such duplicate commands should not be used in practice
(seems that in GNU ld the output sections may just disappear).)

A linker script can be used without -T/--script. The traditional `SECTIONS`
commands are concatenated, so a wrong rule can be more noticeable from the
section order. This feature if misused can be less noticeable, just like
`INSERT`.

Differential Revision: https://reviews.llvm.org/D103303
This commit is contained in:
Fangrui Song 2021-06-13 12:41:11 -07:00
parent 60e32a1f34
commit 899fdf548e
8 changed files with 320 additions and 59 deletions

View File

@ -254,9 +254,13 @@ void LinkerScript::processInsertCommands() {
for (const InsertCommand &cmd : insertCommands) {
// If cmd.os is empty, it may have been discarded by
// adjustSectionsBeforeSorting(). We do not handle such output sections.
auto from = llvm::find(sectionCommands, cmd.os);
auto from = llvm::find_if(sectionCommands, [&](BaseCommand *base) {
return isa<OutputSection>(base) &&
cast<OutputSection>(base)->name == cmd.name;
});
if (from == sectionCommands.end())
continue;
OutputSection *osec = cast<OutputSection>(*from);
sectionCommands.erase(from);
auto insertPos = llvm::find_if(sectionCommands, [&cmd](BaseCommand *base) {
@ -264,12 +268,12 @@ void LinkerScript::processInsertCommands() {
return to != nullptr && to->name == cmd.where;
});
if (insertPos == sectionCommands.end()) {
error("unable to insert " + cmd.os->name +
error("unable to insert " + osec->name +
(cmd.isAfter ? " after " : " before ") + cmd.where);
} else {
if (cmd.isAfter)
++insertPos;
sectionCommands.insert(insertPos, cmd.os);
sectionCommands.insert(insertPos, osec);
}
}
}
@ -547,52 +551,73 @@ LinkerScript::createInputSectionList(OutputSection &outCmd) {
// Create output sections described by SECTIONS commands.
void LinkerScript::processSectionCommands() {
size_t i = 0;
for (BaseCommand *base : sectionCommands) {
if (auto *sec = dyn_cast<OutputSection>(base)) {
std::vector<InputSectionBase *> v = createInputSectionList(*sec);
auto process = [this](OutputSection *osec) {
std::vector<InputSectionBase *> v = createInputSectionList(*osec);
// The output section name `/DISCARD/' is special.
// Any input section assigned to it is discarded.
if (sec->name == "/DISCARD/") {
for (InputSectionBase *s : v)
discard(s);
discardSynthetic(*sec);
sec->sectionCommands.clear();
continue;
}
// This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive
// ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input
// sections satisfy a given constraint. If not, a directive is handled
// as if it wasn't present from the beginning.
//
// Because we'll iterate over SectionCommands many more times, the easy
// way to "make it as if it wasn't present" is to make it empty.
if (!matchConstraints(v, sec->constraint)) {
for (InputSectionBase *s : v)
s->parent = nullptr;
sec->sectionCommands.clear();
continue;
}
// Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign
// is given, input sections are aligned to that value, whether the
// given value is larger or smaller than the original section alignment.
if (sec->subalignExpr) {
uint32_t subalign = sec->subalignExpr().getValue();
for (InputSectionBase *s : v)
s->alignment = subalign;
}
// Set the partition field the same way OutputSection::recordSection()
// does. Partitions cannot be used with the SECTIONS command, so this is
// always 1.
sec->partition = 1;
sec->sectionIndex = i++;
// The output section name `/DISCARD/' is special.
// Any input section assigned to it is discarded.
if (osec->name == "/DISCARD/") {
for (InputSectionBase *s : v)
discard(s);
discardSynthetic(*osec);
osec->sectionCommands.clear();
return false;
}
}
// This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive
// ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input
// sections satisfy a given constraint. If not, a directive is handled
// as if it wasn't present from the beginning.
//
// Because we'll iterate over SectionCommands many more times, the easy
// way to "make it as if it wasn't present" is to make it empty.
if (!matchConstraints(v, osec->constraint)) {
for (InputSectionBase *s : v)
s->parent = nullptr;
osec->sectionCommands.clear();
return false;
}
// Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign
// is given, input sections are aligned to that value, whether the
// given value is larger or smaller than the original section alignment.
if (osec->subalignExpr) {
uint32_t subalign = osec->subalignExpr().getValue();
for (InputSectionBase *s : v)
s->alignment = subalign;
}
// Set the partition field the same way OutputSection::recordSection()
// does. Partitions cannot be used with the SECTIONS command, so this is
// always 1.
osec->partition = 1;
return true;
};
// Process OVERWRITE_SECTIONS first so that it can overwrite the main script
// or orphans.
DenseMap<StringRef, OutputSection *> map;
size_t i = 0;
for (OutputSection *osec : overwriteSections)
if (process(osec) && !map.try_emplace(osec->name, osec).second)
warn("OVERWRITE_SECTIONS specifies duplicate " + osec->name);
for (BaseCommand *&base : sectionCommands)
if (auto *osec = dyn_cast<OutputSection>(base)) {
if (OutputSection *overwrite = map.lookup(osec->name)) {
log(overwrite->location + " overwrites " + osec->name);
overwrite->sectionIndex = i++;
base = overwrite;
} else if (process(osec)) {
osec->sectionIndex = i++;
}
}
// If an OVERWRITE_SECTIONS specified output section is not in
// sectionCommands, append it to the end. The section will be inserted by
// orphan placement.
for (OutputSection *osec : overwriteSections)
if (osec->partition == 1 && osec->sectionIndex == UINT32_MAX)
sectionCommands.push_back(osec);
}
void LinkerScript::processSymbolAssignments() {

View File

@ -227,7 +227,7 @@ struct ByteCommand : BaseCommand {
};
struct InsertCommand {
OutputSection *os;
StringRef name;
bool isAfter;
StringRef where;
};
@ -343,6 +343,9 @@ public:
// to be reordered.
std::vector<InsertCommand> insertCommands;
// OutputSections specified by OVERWRITE_SECTIONS.
std::vector<OutputSection *> overwriteSections;
// Sections that will be warned/errored by --orphan-handling.
std::vector<const InputSectionBase *> orphanSections;
};

View File

@ -77,6 +77,7 @@ private:
void readOutput();
void readOutputArch();
void readOutputFormat();
void readOverwriteSections();
void readPhdrs();
void readRegionAlias();
void readSearchDir();
@ -251,6 +252,8 @@ void ScriptParser::readLinkerScript() {
readOutputArch();
} else if (tok == "OUTPUT_FORMAT") {
readOutputFormat();
} else if (tok == "OVERWRITE_SECTIONS") {
readOverwriteSections();
} else if (tok == "PHDRS") {
readPhdrs();
} else if (tok == "REGION_ALIAS") {
@ -553,6 +556,12 @@ std::vector<BaseCommand *> ScriptParser::readOverlay() {
return v;
}
void ScriptParser::readOverwriteSections() {
expect("{");
while (!errorCount() && !consume("}"))
script->overwriteSections.push_back(readOutputSectionDescription(next()));
}
void ScriptParser::readSections() {
expect("{");
std::vector<BaseCommand *> v;
@ -588,7 +597,7 @@ void ScriptParser::readSections() {
StringRef where = next();
for (BaseCommand *cmd : v)
if (auto *os = dyn_cast<OutputSection>(cmd))
script->insertCommands.push_back({os, isAfter, where});
script->insertCommands.push_back({os->name, isAfter, where});
}
void ScriptParser::readTarget() {

View File

@ -36,6 +36,39 @@ reset ``st_type`` to ``STT_NOTYPE``.
The ``st_size`` field is set to 0.
SECTIONS command
~~~~~~~~~~~~~~~~
A ``SECTIONS`` command looks like:
::
SECTIONS {
section-command
section-command
...
} [INSERT [AFTER|BEFORE] anchor_section;]
Each section-command can be a symbol assignment, an output section description,
or an overlay description.
When the ``INSERT`` keyword is present, the ``SECTIONS`` command describes some
output sections which should be inserted after or before the specified anchor
section. The insertion occurs after input sections have been mapped to output
sections but before orphan sections have been processed.
In the case where no linker script has been provided or every ``SECTIONS``
command is followed by ``INSERT``, LLD applies built-in rules which are similar
to GNU ld's internal linker scripts.
- Align the first section in a ``PT_LOAD`` segment according to ``-z noseparate-code``,
``-z separate-code``, or ``-z separate-loadable-segments``
- Define ``__bss_start``, ``end``, ``_end``, ``etext``, ``_etext``, ``edata``, ``_edata``
- Sort ``.ctors.*``/``.dtors.*``/``.init_array.*``/``.fini_array.*`` and PowerPC64 specific ``.toc``
- Place input ``.text.*`` into output ``.text``, and handle certain variants
(``.text.hot.``, ``.text.unknown.``, ``.text.unlikely.``, etc) in the precense of
``-z keep-text-section-prefix``.
Output section description
~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -94,3 +127,30 @@ If neither ``AT(lma)`` nor ``AT>lma_region`` is specified:
section have the same memory regions, the difference between the LMA and the
VMA is computed to be the same as the previous difference.
- Otherwise, the LMA is set to the VMA.
Overwrite sections
~~~~~~~~~~~~~~~~~~
An ``OVERWRITE_SECTIONS`` command looks like:
::
OVERWRITE_SECTIONS {
output-section-description
output-section-description
...
}
Unlike a ``SECTIONS`` command, ``OVERWRITE_SECTIONS`` does not specify a
section order or suppress the built-in rules.
If a described output section description also appears in a ``SECTIONS``
command, the ``OVERWRITE_SECTIONS`` command wins; otherwise, the output section
will be added somewhere following the usual orphan section placement rules.
If a described output section description also appears in an ``INSERT
[AFTER|BEFORE]`` command, the description will be provided by the
description in the ``OVERWRITE_SECTIONS`` command while the insert command
still applies (possibly after orphan section placement). It is recommended to
leave the brace empty (i.e. ``section : {}``) for the insert command, because
its description will be ignored anyway.

View File

@ -28,6 +28,8 @@ ELF Improvements
(`D102461 <https://reviews.llvm.org/D102461>`_)
* ``-Bno-symbolic`` has been added.
(`D102461 <https://reviews.llvm.org/D102461>`_)
* A new linker script command ``OVERWRITE_SECTIONS`` has been added.
(`D103303 <https://reviews.llvm.org/D103303>`_)
Breaking changes
----------------

View File

@ -1,7 +1,6 @@
# REQUIRES: x86
## Test that we can handle cases where an output section is specified by multiple
## INSERT commands. Each output section description creates a new instance.
## A redundant description matches no input sections and thus is a no-op.
## Document the behavior when an output section is specified by multiple
## INSERT commands. It is discouraged in the real world.
# RUN: llvm-mc -filetype=obj -triple=x86_64 %p/Inputs/insert-after.s -o %t.o
# RUN: ld.lld -T %s %t.o -o %t
@ -9,16 +8,14 @@
# CHECK: Name Type Address Off
# CHECK-NEXT: NULL 0000000000000000 000000
# CHECK-NEXT: .text PROGBITS 00000000002011c8 0001c8
# CHECK-NEXT: .foo.data PROGBITS 00000000002021d0 0001d0
# CHECK-NEXT: .foo.text PROGBITS 00000000002031d8 0001d8
# CHECK-NEXT: .text PROGBITS 0000000000201158 000158
# CHECK-NEXT: .foo.text PROGBITS 0000000000201160 000160
# CHECK-NEXT: .foo.data PROGBITS 0000000000202168 000168
# CHECK: Type
# CHECK-NEXT: PHDR {{.*}} R
# CHECK-NEXT: LOAD {{.*}} R
# CHECK-NEXT: LOAD {{.*}} R E
# CHECK-NEXT: LOAD {{.*}} RW
# CHECK-NEXT: LOAD {{.*}} R E
# CHECK-NEXT: LOAD {{.*}} RW
# CHECK-NEXT: GNU_STACK {{.*}} RW
## First, move .foo.data after .foo.text
@ -27,6 +24,5 @@ SECTIONS { .foo.data : { *(.foo.data) } } INSERT AFTER .foo.text;
## Next, move .foo.text after .foo.data
SECTIONS { .foo.text : { *(.foo.text) } } INSERT AFTER .foo.data;
## No-op. The .foo.data output section is a different instance and matches no
## input sections.
## Then, move .foo.data after .foo.text again.
SECTIONS { .foo.data : { *(.foo.data) } } INSERT AFTER .foo.text;

View File

@ -0,0 +1,30 @@
# REQUIRES: x86
# RUN: split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/a.s -o %t/a.o
## /DISCARD/ specified by OVERWRITE_SECTIONS can discard sections as well.
# RUN: ld.lld %t/overwrite.lds -T %t/main.lds %t/a.o -o %t1 2>&1 | count 0
# RUN: llvm-readelf -S -l %t1 | FileCheck %s
# RUN: ld.lld %t/overwrite.lds %t/overwrite.lds -T %t/main.lds %t/a.o -o %t1 2>&1 | count 0
# RUN: llvm-readelf -S -l %t1 | FileCheck %s
# CHECK: Name
# CHECK-NOT: .data
#--- a.s
.globl _start
_start:
.section .data.1,"aw"; .byte 1
.section .data.2,"aw"; .byte 2
#--- main.lds
SECTIONS {
/DISCARD/ : { *(.data.1) }
}
#--- overwrite.lds
OVERWRITE_SECTIONS {
/DISCARD/ : { *(.data.2) }
}

View File

@ -0,0 +1,136 @@
# REQUIRES: x86
# RUN: rm -rf %t && split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64 %t/a.s -o %t/a.o
## There is no main linker script. OVERWRITE_SECTIONS defines output section
## descriptions and follows the usual orphan placement rules.
# RUN: ld.lld %t/overwrite1.lds %t/a.o -o %t1
# RUN: llvm-readelf -S %t1 | FileCheck %s --check-prefix=CHECK1
# CHECK1: Name Type Address Off Size ES Flg Lk Inf Al
# CHECK1-NEXT: NULL [[#%x,]] [[#%x,]] 000000
# CHECK1-NEXT: .text PROGBITS [[#%x,]] [[#%x,]] 000001 00 AX 0 0 4
# CHECK1-NEXT: .foo.text PROGBITS [[#%x,]] [[#%x,]] 000002 00 WAX 0 0 8
# CHECK1-NEXT: .data PROGBITS [[#%x,]] [[#%x,]] 000001 00 WA 0 0 1
# CHECK1-NEXT: .comment PROGBITS {{.*}}
# RUN: ld.lld %t/overwrite1.lds %t/overwrite1.lds %t/a.o -o /dev/null 2>&1 | FileCheck %s --check-prefix=WARN1
# WARN1: warning: OVERWRITE_SECTIONS specifies duplicate .foo.text
## The output section description order (.foo.data .foo.text) does not affect
## the output order.
# RUN: ld.lld %t/overwrite2.lds %t/a.o -o %t2
# RUN: llvm-readelf -S -s %t2 | FileCheck %s --check-prefix=CHECK2
# CHECK2: [Nr] Name Type Address Off Size ES Flg Lk Inf Al
# CHECK2-NEXT: [ 0] NULL [[#%x,]] [[#%x,]] 000000
# CHECK2-NEXT: [ 1] .text PROGBITS [[#%x,]] [[#%x,]] 000001 00 AX 0 0 4
# CHECK2-NEXT: [ 2] .foo.text PROGBITS [[#%x,]] [[#%x,]] 000001 00 AX 0 0 1
# CHECK2-NEXT: [ 3] .data PROGBITS [[#%x,]] [[#%x,]] 000001 00 WA 0 0 1
# CHECK2-NEXT: [ 4] .foo.data PROGBITS [[#%x,]] [[#%x,]] 000001 00 WA 0 0 1
# CHECK2-NEXT: [ 5] .comment PROGBITS {{.*}}
# CHECK2: Num: Value Size Type Bind Vis Ndx Name
# CHECK2: [[#]]: [[#%x,ADDR:]] 0 NOTYPE GLOBAL DEFAULT 4 FOO_DATA_START
# CHECK2-NEXT: [[#]]: {{0*}}[[#ADDR+1]] 0 NOTYPE GLOBAL DEFAULT 4 FOO_DATA_END
# CHECK2-NEXT: [[#]]: [[#%x,ADDR:]] 0 NOTYPE GLOBAL DEFAULT 2 FOO_TEXT_START
# CHECK2-NEXT: [[#]]: {{0*}}[[#ADDR+1]] 0 NOTYPE GLOBAL DEFAULT 2 FOO_TEXT_END
## OVERWRITE_SECTIONS is processed before the main script. The main script
## specifies the output order. The alignment of .foo.text is specified by
## OVERWRITE_SECTIONS insteaad of the main script.
# RUN: ld.lld -T %t/main.lds %t/overwrite1.lds %t/a.o -o %t3 2>&1 | count 0
# RUN: llvm-readelf -S %t3 | FileCheck %s --check-prefix=CHECK3
# RUN: ld.lld %t/overwrite1.lds -T %t/main.lds %t/a.o -o %t3 2>&1 | count 0
# RUN: llvm-readelf -S %t3 | FileCheck %s --check-prefix=CHECK3
# CHECK3: Name Type Address Off Size ES Flg Lk Inf Al
# CHECK3-NEXT: NULL [[#%x,]] [[#%x,]] 000000
# CHECK3-NEXT: .data PROGBITS [[#%x,]] [[#%x,]] 000001 00 WA 0 0 1
# CHECK3-NEXT: .text PROGBITS [[#%x,]] [[#%x,]] 000001 00 AX 0 0 4
# CHECK3-NEXT: .foo.text PROGBITS [[#%x,]] [[#%x,]] 000002 00 WAX 0 0 8
# CHECK3-NEXT: .comment PROGBITS {{.*}}
# RUN: ld.lld -T %t/main.lds %t/overwrite1.lds %t/a.o -o %t3 --verbose 2>&1 | FileCheck %s --check-prefix=VERBOSE3
# VERBOSE3: {{.*}}overwrite1.lds:[[#]] overwrites .foo.text
## If INSERT commands are specified, INSERT commands are processed after overwrite sections.
# RUN: ld.lld %t/insert.lds %t/overwrite2.lds %t/a.o -o %t4
# RUN: llvm-readelf -S -s %t4 | FileCheck %s --check-prefix=CHECK4
# CHECK4: [Nr] Name Type Address Off Size ES Flg Lk Inf Al
# CHECK4-NEXT: [ 0] NULL [[#%x,]] [[#%x,]] 000000
# CHECK4-NEXT: [ 1] .text PROGBITS [[#%x,]] [[#%x,]] 000001 00 AX 0 0 4
# CHECK4-NEXT: [ 2] .data PROGBITS [[#%x,]] [[#%x,]] 000001 00 WA 0 0 1
# CHECK4-NEXT: [ 3] .foo.data PROGBITS [[#%x,]] [[#%x,]] 000001 00 WA 0 0 1
# CHECK4-NEXT: [ 4] .foo.text PROGBITS [[#%x,]] [[#%x,]] 000001 00 AX 0 0 1
# CHECK4-NEXT: [ 5] .comment PROGBITS {{.*}}
# CHECK4: Num: Value Size Type Bind Vis Ndx Name
# CHECK4: [[#]]: [[#%x,ADDR:]] 0 NOTYPE GLOBAL DEFAULT 4 FOO_TEXT_START
# CHECK4-NEXT: [[#]]: {{0*}}[[#ADDR+1]] 0 NOTYPE GLOBAL DEFAULT 4 FOO_TEXT_END
# CHECK4-NEXT: [[#]]: [[#%x,ADDR:]] 0 NOTYPE GLOBAL DEFAULT 3 FOO_DATA_START
# CHECK4-NEXT: [[#]]: {{0*}}[[#ADDR+1]] 0 NOTYPE GLOBAL DEFAULT 3 FOO_DATA_END
## OVERWRITE_SECTIONS describes a section used as an anchor of an insert command.
## Place .foo.data as an orphan, then insert .foo.text after .foo.data.
# RUN: ld.lld %t/insert.lds %t/overwrite3.lds %t/a.o -o %t5
# RUN: llvm-readelf -S -s %t4 | FileCheck %s --check-prefix=CHECK5
# CHECK5: [Nr] Name Type Address Off Size ES Flg Lk Inf Al
# CHECK5-NEXT: [ 0] NULL [[#%x,]] [[#%x,]] 000000
# CHECK5-NEXT: [ 1] .text PROGBITS [[#%x,]] [[#%x,]] 000001 00 AX 0 0 4
# CHECK5-NEXT: [ 2] .data PROGBITS [[#%x,]] [[#%x,]] 000001 00 WA 0 0 1
# CHECK5-NEXT: [ 3] .foo.data PROGBITS [[#%x,]] [[#%x,]] 000001 00 WA 0 0 1
# CHECK5-NEXT: [ 4] .foo.text PROGBITS [[#%x,]] [[#%x,]] 000001 00 AX 0 0 1
# CHECK5-NEXT: [ 5] .comment PROGBITS {{.*}}
#--- a.s
.globl _start
_start:
.section .foo.text,"ax"; .byte 1
.section .foo.data,"aw"; .byte 2
.section .text.1,"ax"; .byte 3
.section .data.1,"aw"; .byte 4
#--- main.lds
SECTIONS {
.data : { *(.data*) }
.foo.data : { *(.foo.data) }
.text : { *(.text*) }
.foo.text : ALIGN(16) { *(.foo.text) }
}
#--- overwrite1.lds
OVERWRITE_SECTIONS {
.foo.text : ALIGN(8) { *(.foo.data .foo.text) }
}
#--- overwrite2.lds
OVERWRITE_SECTIONS {
.foo.data : { FOO_DATA_START = .; *(.foo.data) FOO_DATA_END = .; }
.foo.text : { FOO_TEXT_START = .; *(.foo.text) FOO_TEXT_END = .; }
}
#--- overwrite3.lds
OVERWRITE_SECTIONS {
.foo.data : {}
}
#--- insert.lds
SECTIONS { .foo.text : {} } INSERT AFTER .foo.data;
#--- err1.lds
## TODO Fix the diagnostic 'malformed number'
# RUN: not ld.lld %t/err1.lds %t/a.o -o /dev/null 2>&1 | FileCheck %s -DFILE=%t/err1.lds --check-prefix=ERR1
# ERR1: error: [[FILE]]:[[#@LINE+2]]: malformed number: =
OVERWRITE_SECTIONS {
A = 1;
}