diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h index 6405fb38c6d6..413871b60cf7 100644 --- a/lld/ELF/OutputSections.h +++ b/lld/ELF/OutputSections.h @@ -50,6 +50,7 @@ public: template void writeHeaderTo(typename ELFT::Shdr *SHdr); unsigned SectionIndex; + unsigned SortRank; uint32_t getPhdrFlags() const; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index c8f5378aa829..4020fb1bdfec 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -546,32 +546,6 @@ template void Writer::addSectionSymbols() { } } -// PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections that -// we would like to make sure appear is a specific order to maximize their -// coverage by a single signed 16-bit offset from the TOC base pointer. -// Conversely, the special .tocbss section should be first among all SHT_NOBITS -// sections. This will put it next to the loaded special PPC64 sections (and, -// thus, within reach of the TOC base pointer). -static int getPPC64SectionRank(StringRef SectionName) { - return StringSwitch(SectionName) - .Case(".tocbss", 0) - .Case(".branch_lt", 2) - .Case(".toc", 3) - .Case(".toc1", 4) - .Case(".opd", 5) - .Default(1); -} - -// All sections with SHF_MIPS_GPREL flag should be grouped together -// because data in these sections is addressable with a gp relative address. -static int getMipsSectionRank(const OutputSection *S) { - if ((S->Flags & SHF_MIPS_GPREL) == 0) - return 0; - if (S->Name == ".got") - return 1; - return 2; -} - // Today's loaders have a feature to make segments read-only after // processing dynamic relocations to enhance security. PT_GNU_RELRO // is defined for that. @@ -645,99 +619,145 @@ bool elf::isRelroSection(const OutputSection *Sec) { S == ".eh_frame" || S == ".openbsd.randomdata"; } -static bool compareSectionsNonScript(const OutputSection *A, - const OutputSection *B) { - // Put .interp first because some loaders want to see that section - // on the first page of the executable file when loaded into memory. - bool AIsInterp = A->Name == ".interp"; - bool BIsInterp = B->Name == ".interp"; - if (AIsInterp != BIsInterp) - return AIsInterp; +// We compute a rank for each section. The rank indicates where the +// section should be placed in the file. Instead of using simple +// numbers (0,1,2...), we use a series of flags. One for each decision +// point when placing the section. +// Using flags has two key properties: +// * It is easy to check if a give branch was taken. +// * It is easy two see how similar two ranks are (see getRankProximity). +enum RankFlags { + RF_NOT_ADDR_SET = 1 << 16, + RF_NOT_INTERP = 1 << 15, + RF_NOT_ALLOC = 1 << 14, + RF_WRITE = 1 << 13, + RF_EXEC = 1 << 12, + RF_NON_TLS_BSS = 1 << 11, + RF_NON_TLS_BSS_RO = 1 << 10, + RF_NOT_TLS = 1 << 9, + RF_BSS = 1 << 8, + RF_PPC_NOT_TOCBSS = 1 << 7, + RF_PPC_OPD = 1 << 6, + RF_PPC_TOCL = 1 << 5, + RF_PPC_TOC = 1 << 4, + RF_PPC_BRANCH_LT = 1 << 3, + RF_MIPS_GPREL = 1 << 2, + RF_MIPS_NOT_GOT = 1 << 1 +}; - // Allocatable sections go first to reduce the total PT_LOAD size and - // so debug info doesn't change addresses in actual code. - bool AIsAlloc = A->Flags & SHF_ALLOC; - bool BIsAlloc = B->Flags & SHF_ALLOC; - if (AIsAlloc != BIsAlloc) - return AIsAlloc; - - // We don't have any special requirements for the relative order of two non - // allocatable sections. - if (!AIsAlloc) - return false; +static unsigned getSectionRank(const OutputSection *Sec) { + unsigned Rank = 0; // We want to put section specified by -T option first, so we // can start assigning VA starting from them later. - auto AAddrSetI = Config->SectionStartMap.find(A->Name); - auto BAddrSetI = Config->SectionStartMap.find(B->Name); - bool AHasAddrSet = AAddrSetI != Config->SectionStartMap.end(); - bool BHasAddrSet = BAddrSetI != Config->SectionStartMap.end(); - if (AHasAddrSet != BHasAddrSet) - return AHasAddrSet; - if (AHasAddrSet) - return AAddrSetI->second < BAddrSetI->second; + if (Config->SectionStartMap.count(Sec->Name)) + return Rank; + Rank |= RF_NOT_ADDR_SET; + + // Put .interp first because some loaders want to see that section + // on the first page of the executable file when loaded into memory. + if (Sec->Name == ".interp") + return Rank; + Rank |= RF_NOT_INTERP; + + // Allocatable sections go first to reduce the total PT_LOAD size and + // so debug info doesn't change addresses in actual code. + if (!(Sec->Flags & SHF_ALLOC)) + return Rank | RF_NOT_ALLOC; // We want the read only sections first so that they go in the PT_LOAD // covering the program headers at the start of the file. - bool AIsWritable = A->Flags & SHF_WRITE; - bool BIsWritable = B->Flags & SHF_WRITE; - if (AIsWritable != BIsWritable) - return BIsWritable; + if (Sec->Flags & SHF_WRITE) + Rank |= RF_WRITE; - if (!Config->SingleRoRx) { + if (Sec->Flags & SHF_EXECINSTR) { // For a corresponding reason, put non exec sections first (the program // header PT_LOAD is not executable). // We only do that if we are not using linker scripts, since with linker // scripts ro and rx sections are in the same PT_LOAD, so their relative // order is not important. The same applies for -no-rosegment. - bool AIsExec = A->Flags & SHF_EXECINSTR; - bool BIsExec = B->Flags & SHF_EXECINSTR; - if (AIsExec != BIsExec) - return BIsExec; + if ((Rank & RF_WRITE) || !Config->SingleRoRx) + Rank |= RF_EXEC; } // If we got here we know that both A and B are in the same PT_LOAD. - bool AIsTls = A->Flags & SHF_TLS; - bool BIsTls = B->Flags & SHF_TLS; - bool AIsNoBits = A->Type == SHT_NOBITS; - bool BIsNoBits = B->Type == SHT_NOBITS; + bool IsTls = Sec->Flags & SHF_TLS; + bool IsNoBits = Sec->Type == SHT_NOBITS; // The first requirement we have is to put (non-TLS) nobits sections last. The // reason is that the only thing the dynamic linker will see about them is a // p_memsz that is larger than p_filesz. Seeing that it zeros the end of the // PT_LOAD, so that has to correspond to the nobits sections. - bool AIsNonTlsNoBits = AIsNoBits && !AIsTls; - bool BIsNonTlsNoBits = BIsNoBits && !BIsTls; - if (AIsNonTlsNoBits != BIsNonTlsNoBits) - return BIsNonTlsNoBits; + bool IsNonTlsNoBits = IsNoBits && !IsTls; + if (IsNonTlsNoBits) + Rank |= RF_NON_TLS_BSS; // We place nobits RelRo sections before plain r/w ones, and non-nobits RelRo // sections after r/w ones, so that the RelRo sections are contiguous. - bool AIsRelRo = isRelroSection(A); - bool BIsRelRo = isRelroSection(B); - if (AIsRelRo != BIsRelRo) - return AIsNonTlsNoBits ? AIsRelRo : BIsRelRo; + bool IsRelRo = isRelroSection(Sec); + if (IsNonTlsNoBits && !IsRelRo) + Rank |= RF_NON_TLS_BSS_RO; + if (!IsNonTlsNoBits && IsRelRo) + Rank |= RF_NON_TLS_BSS_RO; // The TLS initialization block needs to be a single contiguous block in a R/W // PT_LOAD, so stick TLS sections directly before the other RelRo R/W // sections. The TLS NOBITS sections are placed here as they don't take up // virtual address space in the PT_LOAD. - if (AIsTls != BIsTls) - return AIsTls; + if (!IsTls) + Rank |= RF_NOT_TLS; // Within the TLS initialization block, the non-nobits sections need to appear // first. - if (AIsNoBits != BIsNoBits) - return BIsNoBits; + if (IsNoBits) + Rank |= RF_BSS; - // Some architectures have additional ordering restrictions for sections - // within the same PT_LOAD. - if (Config->EMachine == EM_PPC64) - return getPPC64SectionRank(A->Name) < getPPC64SectionRank(B->Name); - if (Config->EMachine == EM_MIPS) - return getMipsSectionRank(A) < getMipsSectionRank(B); + // // Some architectures have additional ordering restrictions for sections + // // within the same PT_LOAD. + if (Config->EMachine == EM_PPC64) { + // PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections + // that we would like to make sure appear is a specific order to maximize + // their coverage by a single signed 16-bit offset from the TOC base + // pointer. Conversely, the special .tocbss section should be first among + // all SHT_NOBITS sections. This will put it next to the loaded special + // PPC64 sections (and, thus, within reach of the TOC base pointer). + StringRef Name = Sec->Name; + if (Name != ".tocbss") + Rank |= RF_PPC_NOT_TOCBSS; + if (Name == ".opd") + Rank |= RF_PPC_OPD; + + if (Name == ".toc1") + Rank |= RF_PPC_TOCL; + + if (Name == ".toc") + Rank |= RF_PPC_TOC; + + if (Name == ".branch_lt") + Rank |= RF_PPC_BRANCH_LT; + } + if (Config->EMachine == EM_MIPS) { + // All sections with SHF_MIPS_GPREL flag should be grouped together + // because data in these sections is addressable with a gp relative address. + if (Sec->Flags & SHF_MIPS_GPREL) + Rank |= RF_MIPS_GPREL; + + if (Sec->Name != ".got") + Rank |= RF_MIPS_NOT_GOT; + } + + return Rank; +} + +static bool compareSectionsNonScript(const OutputSection *A, + const OutputSection *B) { + if (A->SortRank != B->SortRank) + return A->SortRank < B->SortRank; + if (!(A->SortRank & RF_NOT_ADDR_SET)) + return Config->SectionStartMap.lookup(A->Name) < + Config->SectionStartMap.lookup(B->Name); return false; } @@ -751,8 +771,7 @@ static bool compareSections(const OutputSection *A, const OutputSection *B) { if (AIndex != BIndex) return AIndex < BIndex; - // The sections are not in the linker script, so don't sort for now. - return false; + return compareSectionsNonScript(A, B); } // Program header entry @@ -962,45 +981,36 @@ template void Writer::createSections() { Sec->assignOffsets(); } -static bool canSharePtLoad(const OutputSection &S1, const OutputSection &S2) { - if (!(S1.Flags & SHF_ALLOC) || !(S2.Flags & SHF_ALLOC)) - return false; - - bool S1IsWrite = S1.Flags & SHF_WRITE; - bool S2IsWrite = S2.Flags & SHF_WRITE; - if (S1IsWrite != S2IsWrite) - return false; - - if (!S1IsWrite) - return true; // RO and RX share a PT_LOAD with linker scripts. - return (S1.Flags & SHF_EXECINSTR) == (S2.Flags & SHF_EXECINSTR); +// We want to find how similar two ranks are. +// The more branches in getSectionRank that match, the more similar they are. +// Since each branch corresponds to a bit flag, we can just use +// countLeadingZeros. +static unsigned getRankProximity(OutputSection *A, OutputSection *B) { + return countLeadingZeros(A->SortRank ^ B->SortRank); } -// We assume, like createPhdrs that all allocs are at the start. +// We want to place orphan sections so that they share as much +// characteristics with their neighbors as possible. For example, if +// both are rw, or both are tls. template static std::vector::iterator findOrphanPos(std::vector::iterator B, std::vector::iterator E) { OutputSection *Sec = *E; - // If it is not allocatable, just leave it at the end. - if (!(Sec->Flags & SHF_ALLOC)) + // Find the first element that has as close a rank as possible. + auto I = std::max_element(B, E, [=](OutputSection *A, OutputSection *B) { + return getRankProximity(Sec, A) < getRankProximity(Sec, B); + }); + if (I == E) return E; - // Find the first sharable. - auto Pos = std::find_if( - B, E, [=](OutputSection *S) { return canSharePtLoad(*S, *Sec); }); - if (Pos != E) { - // Ony consider the sharable range. - B = Pos; - E = std::find_if( - B, E, [=](OutputSection *S) { return !canSharePtLoad(*S, *Sec); }); - assert(B != E); - } - - // Find the fist position that Sec compares less to. - return std::find_if( - B, E, [=](OutputSection *S) { return compareSectionsNonScript(Sec, S); }); + // Consider all existing sections with the same proximity. + unsigned Proximity = getRankProximity(Sec, *I); + while (I != E && getRankProximity(Sec, *I) == Proximity && + Sec->SortRank >= (*I)->SortRank) + ++I; + return I; } template void Writer::sortSections() { @@ -1008,12 +1018,18 @@ template void Writer::sortSections() { // relative order for SHF_LINK_ORDER sections. if (Config->Relocatable) return; + + if (Script->Opt.HasSections) + Script->adjustSectionsBeforeSorting(); + + for (OutputSection *Sec : OutputSections) + Sec->SortRank = getSectionRank(Sec); + if (!Script->Opt.HasSections) { std::stable_sort(OutputSections.begin(), OutputSections.end(), compareSectionsNonScript); return; } - Script->adjustSectionsBeforeSorting(); // The order of the sections in the script is arbitrary and may not agree with // compareSectionsNonScript. This means that we cannot easily define a @@ -1046,8 +1062,18 @@ template void Writer::sortSections() { auto NonScriptI = std::find_if(OutputSections.begin(), E, [](OutputSection *S) { return S->SectionIndex == INT_MAX; }); - for (; NonScriptI != E; ++NonScriptI) - std::rotate(findOrphanPos(I, NonScriptI), NonScriptI, NonScriptI + 1); + while (NonScriptI != E) { + auto Pos = findOrphanPos(I, NonScriptI); + + // As an optimization, find all sections with the same sort rank + // and insert them with one rotate. + unsigned Rank = (*NonScriptI)->SortRank; + auto End = std::find_if(NonScriptI + 1, E, [=](OutputSection *Sec) { + return Sec->SortRank != Rank; + }); + std::rotate(Pos, NonScriptI, End); + NonScriptI = End; + } Script->adjustSectionsAfterSorting(); } diff --git a/lld/test/ELF/linkerscript/sections-constraint.s b/lld/test/ELF/linkerscript/sections-constraint.s index 4d95ec18336c..796240627170 100644 --- a/lld/test/ELF/linkerscript/sections-constraint.s +++ b/lld/test/ELF/linkerscript/sections-constraint.s @@ -24,8 +24,8 @@ # NO1-NEXT: 0 00000000 # NO1: .writable 00000004 # NO1: .foo.2 00000004 -# NO1: .readable 00000004 # NO1: .foo.1 00000004 +# NO1: .readable 00000004 .global _start _start: diff --git a/lld/test/ELF/linkerscript/sections.s b/lld/test/ELF/linkerscript/sections.s index 69c6f19d078d..d5645c303754 100644 --- a/lld/test/ELF/linkerscript/sections.s +++ b/lld/test/ELF/linkerscript/sections.s @@ -45,8 +45,9 @@ # SEC-ORDER: 3 .shstrtab 0000003b {{[0-9a-f]*}} # SEC-ORDER: 4 .symtab 00000030 {{[0-9a-f]*}} # SEC-ORDER: 5 .strtab 00000008 {{[0-9a-f]*}} -# SEC-ORDER: 6 .data 00000020 {{[0-9a-f]*}} DATA -# SEC-ORDER: 7 .text 0000000e {{[0-9a-f]*}} TEXT DATA +# SEC-ORDER: 6 .comment 00000008 {{[0-9a-f]*}} +# SEC-ORDER: 7 .data 00000020 {{[0-9a-f]*}} DATA +# SEC-ORDER: 8 .text 0000000e {{[0-9a-f]*}} TEXT DATA # .text and .data have swapped names but proper sizes and types. # RUN: echo "SECTIONS { \ diff --git a/lld/test/ELF/many-alloc-sections.s b/lld/test/ELF/many-alloc-sections.s new file mode 100644 index 000000000000..d73f95d97c9f --- /dev/null +++ b/lld/test/ELF/many-alloc-sections.s @@ -0,0 +1,105 @@ +// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t.o +// RUN: echo "SECTIONS { . = SIZEOF_HEADERS; .text : { *(.text) } }" > %t.script +// RUN: ld.lld -T %t.script %t.o -o %t +// RUN: llvm-readobj -t %t | FileCheck %s + +// Test that _start is in the correct section. +// CHECK: Name: _start +// CHECK-NEXT: Value: 0x120 +// CHECK-NEXT: Size: 0 +// CHECK-NEXT: Binding: Global +// CHECK-NEXT: Type: None +// CHECK-NEXT: Other: 0 +// CHECK-NEXT: Section: dm + +.macro gen_sections4 x + .section a\x,"a" + .section b\x,"a" + .section c\x,"a" + .section d\x,"a" +.endm + +.macro gen_sections8 x + gen_sections4 a\x + gen_sections4 b\x +.endm + +.macro gen_sections16 x + gen_sections8 a\x + gen_sections8 b\x +.endm + +.macro gen_sections32 x + gen_sections16 a\x + gen_sections16 b\x +.endm + +.macro gen_sections64 x + gen_sections32 a\x + gen_sections32 b\x +.endm + +.macro gen_sections128 x + gen_sections64 a\x + gen_sections64 b\x +.endm + +.macro gen_sections256 x + gen_sections128 a\x + gen_sections128 b\x +.endm + +.macro gen_sections512 x + gen_sections256 a\x + gen_sections256 b\x +.endm + +.macro gen_sections1024 x + gen_sections512 a\x + gen_sections512 b\x +.endm + +.macro gen_sections2048 x + gen_sections1024 a\x + gen_sections1024 b\x +.endm + +.macro gen_sections4096 x + gen_sections2048 a\x + gen_sections2048 b\x +.endm + +.macro gen_sections8192 x + gen_sections4096 a\x + gen_sections4096 b\x +.endm + +.macro gen_sections16384 x + gen_sections8192 a\x + gen_sections8192 b\x +.endm + +.macro gen_sections32768 x + gen_sections16384 a\x + gen_sections16384 b\x +.endm + + .bss + .section bar + +gen_sections32768 a +gen_sections16384 b +gen_sections8192 c +gen_sections4096 d +gen_sections2048 e +gen_sections1024 f +gen_sections512 g +gen_sections128 h +gen_sections64 i +gen_sections32 j +gen_sections16 k +gen_sections8 l +gen_sections4 m + +.global _start +_start: