[ELF] Sort by input order within an input section description

According to
https://sourceware.org/binutils/docs/ld/Input-Section-Basics.html#Input-Section-Basics
for `*(.a .b)`, the order should match the input order:

* for `ld 1.o 2.o`, sections from 1.o precede sections from 2.o
* within a file, `.a` and `.b` appear in the section header table order

This patch implements the behavior. The interaction with `SORT*` and --sort-section is:

Matched sections are ordered by radix sort with the keys being `(SORT*, --sort-section, input order)`,
where `SORT*` (if present) is most significant.

> Note, multiple `SORT*` within an input section description has undocumented and
> confusing behaviors in GNU ld:
> https://sourceware.org/pipermail/binutils/2020-November/114083.html
> Therefore multiple `SORT*` is not the focus for this patch but
> this patch still strives to have an explainable behavior.

As an example, we partition `SORT(a.*) b.* c.* SORT(d.*)`, into
`SORT(a.*) | b.* c.* | SORT(d.*)` and perform sorting within groups. Sections
matched by patterns between two `SORT*` are sorted by input order.  If
--sort-alignment is given, they are sorted by --sort-alignment, breaking tie by
input order.

This patch also allows a section to be matched by multiple patterns, previously
duplicated sections could occupy more space in the output and had erroneous zero bytes.

The patch is in preparation for support for
`*(SORT_BY_INIT_PRIORITY(.init_array.* .ctors.*)) *(.init_array .ctors)`,
which will allow LLD to mix .ctors*/.init_array* like GNU ld (gold's --ctors-in-init-array)
PR44698 and PR48096

Reviewed By: grimar, psmith

Differential Revision: https://reviews.llvm.org/D91127
This commit is contained in:
Fangrui Song 2020-11-12 08:53:11 -08:00
parent 2a9aed0e8b
commit 73d01a80ce
3 changed files with 64 additions and 17 deletions

View File

@ -409,15 +409,16 @@ static void sortSections(MutableArrayRef<InputSectionBase *> vec,
// 3. If one SORT command is given, and if it is SORT_NONE, don't sort.
// 4. If no SORT command is given, sort according to --sort-section.
static void sortInputSections(MutableArrayRef<InputSectionBase *> vec,
const SectionPattern &pat) {
if (pat.sortOuter == SortSectionPolicy::None)
SortSectionPolicy outer,
SortSectionPolicy inner) {
if (outer == SortSectionPolicy::None)
return;
if (pat.sortInner == SortSectionPolicy::Default)
if (inner == SortSectionPolicy::Default)
sortSections(vec, config->sortSection);
else
sortSections(vec, pat.sortInner);
sortSections(vec, pat.sortOuter);
sortSections(vec, inner);
sortSections(vec, outer);
}
// Compute and remember which sections the InputSectionDescription matches.
@ -425,13 +426,27 @@ std::vector<InputSectionBase *>
LinkerScript::computeInputSections(const InputSectionDescription *cmd,
ArrayRef<InputSectionBase *> sections) {
std::vector<InputSectionBase *> ret;
std::vector<size_t> indexes;
DenseSet<size_t> seen;
auto sortByPositionThenCommandLine = [&](size_t begin, size_t end) {
llvm::sort(MutableArrayRef<size_t>(indexes).slice(begin, end - begin));
for (size_t i = begin; i != end; ++i)
ret[i] = sections[indexes[i]];
sortInputSections(
MutableArrayRef<InputSectionBase *>(ret).slice(begin, end - begin),
config->sortSection, SortSectionPolicy::None);
};
// Collects all sections that satisfy constraints of Cmd.
size_t sizeAfterPrevSort = 0;
for (const SectionPattern &pat : cmd->sectionPatterns) {
size_t sizeBefore = ret.size();
size_t sizeBeforeCurrPat = ret.size();
for (InputSectionBase *sec : sections) {
if (!sec->isLive() || sec->parent)
for (size_t i = 0, e = sections.size(); i != e; ++i) {
// Skip if the section is dead or has been matched by a previous input
// section description or a previous pattern.
InputSectionBase *sec = sections[i];
if (!sec->isLive() || sec->parent || seen.contains(i))
continue;
// For -emit-relocs we have to ignore entries like
@ -453,11 +468,31 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd,
continue;
ret.push_back(sec);
indexes.push_back(i);
seen.insert(i);
}
if (pat.sortOuter == SortSectionPolicy::Default)
continue;
// Matched sections are ordered by radix sort with the keys being (SORT*,
// --sort-section, input order), where SORT* (if present) is most
// significant.
//
// Matched sections between the previous SORT* and this SORT* are sorted by
// (--sort-alignment, input order).
sortByPositionThenCommandLine(sizeAfterPrevSort, sizeBeforeCurrPat);
// Matched sections by this SORT* pattern are sorted using all 3 keys.
// ret[sizeBeforeCurrPat,ret.size()) are already in the input order, so we
// just sort by sortOuter and sortInner.
sortInputSections(
MutableArrayRef<InputSectionBase *>(ret).slice(sizeBefore), pat);
MutableArrayRef<InputSectionBase *>(ret).slice(sizeBeforeCurrPat),
pat.sortOuter, pat.sortInner);
sizeAfterPrevSort = ret.size();
}
// Matched sections after the last SORT* are sorted by (--sort-alignment,
// input order).
sortByPositionThenCommandLine(sizeAfterPrevSort, ret.size());
return ret;
}

View File

@ -8,10 +8,12 @@
# RUN: ld.lld -script %t1.script %tfile1.o %tfile2.o %tfile3.o -o %t1.o
# RUN: llvm-objdump -s %t1.o | FileCheck %s
## Sections from %tfile1 precede sections from %tfile2 and %tfile3.
## In each file, the sections are added in the original order.
# CHECK: Contents of section .foo:
# CHECK-NEXT: 01000000 00000000 04000000 00000000
# CHECK-NEXT: 07000000 00000000 05000000 00000000
# CHECK-NEXT: 08000000 00000000 03000000 00000000
# CHECK-NEXT: 03000000 00000000 01000000 00000000
# CHECK-NEXT: 04000000 00000000 05000000 00000000
# CHECK-NEXT: 07000000 00000000 08000000 00000000
# CHECK-NEXT: 09000000 00000000
# CHECK-NEXT: Contents of section .foo.2:
# CHECK-NEXT: 02000000 00000000
@ -27,11 +29,12 @@
# RUN: not ld.lld -script %t3.script %tfile1.o %tfile2.o %tfile3.o -o /dev/null 2>&1 | \
# RUN: FileCheck %s --check-prefix=ERR
.section .foo.1,"a"
.quad 1
.section .foo.2,"a"
.quad 2
## %tfile1.o(.foo.3) precedes %tfile.o(.foo.1) in the output section.
.section .foo.3,"a"
.quad 3
.section .foo.1,"a"
.quad 1

View File

@ -5,9 +5,11 @@
# RUN: ld.lld -o %t1 --script %t1.script %tfile1.o
# RUN: llvm-readelf -x .abc %t1 | FileCheck %s
## FIXME Some input sections are duplicated in .abc and their second occurrences are zeros.
## Sections matched by patterns between two SORT are sorted separately by input order.
## Note, GNU ld has a strange behavior with more than one SORT* https://sourceware.org/pipermail/binutils/2020-November/114083.html
## In the absence of SORT, our multi-pattern behavior matches GNU ld.
# CHECK: Hex dump of section '.abc'
# CHECK-NEXT: 0x00000000 01020306 05040000 00070908 0b0c0a
# CHECK-NEXT: 0x00000000 01020306 05040708 090b0c0a
# RUN: echo "SECTIONS { \
# RUN: .abc : { *(SORT(.foo.* EXCLUDE_FILE (*file1.o) .bar.*) .a* SORT(.bar.*) .b*) } \
@ -15,6 +17,13 @@
# RUN: ld.lld -o %t2 --script %t2.script %tfile1.o
# RUN: llvm-readelf -x .abc %t2 | FileCheck %s
## Non-SORT patterns are sorted by --sort-section, breaking tie by input order.
# RUN: ld.lld -o %t4 --script %t1.script --sort-section=name %tfile1.o
# RUN: llvm-readelf -x .abc %t4 | FileCheck %s --check-prefix=CHECK2
# CHECK2: Hex dump of section '.abc'
# CHECK2-NEXT: 0x00000000 01020304 05060708 090a0b0c
.text
.globl _start
_start: