ELF: Implement --icf=safe using address-significance tables.

Differential Revision: https://reviews.llvm.org/D48146

llvm-svn: 337429
This commit is contained in:
Peter Collingbourne 2018-07-18 22:49:31 +00:00
parent 4a653fa7f1
commit a327a4c34e
9 changed files with 233 additions and 11 deletions

View File

@ -40,6 +40,9 @@ enum class BuildIdKind { None, Fast, Md5, Sha1, Hexstring, Uuid };
// For --discard-{all,locals,none}.
enum class DiscardPolicy { Default, All, Locals, None };
// For --icf={none,safe,all}.
enum class ICFLevel { None, Safe, All };
// For --strip-{all,debug}.
enum class StripPolicy { None, All, Debug };
@ -138,7 +141,6 @@ struct Configuration {
bool GnuUnique;
bool HasDynamicList = false;
bool HasDynSymTab;
bool ICF;
bool IgnoreDataAddressEquality;
bool IgnoreFunctionAddressEquality;
bool LTODebugPassManager;
@ -187,6 +189,7 @@ struct Configuration {
bool ZRetpolineplt;
bool ZWxneeded;
DiscardPolicy Discard;
ICFLevel ICF;
OrphanHandlingPolicy OrphanHandling;
SortSectionPolicy SortSection;
StripPolicy Strip;

View File

@ -51,6 +51,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/TarWriter.h"
#include "llvm/Support/TargetSelect.h"
@ -296,7 +297,7 @@ static void checkOptions(opt::InputArgList &Args) {
error("-r and --gc-sections may not be used together");
if (Config->GdbIndex)
error("-r and --gdb-index may not be used together");
if (Config->ICF)
if (Config->ICF != ICFLevel::None)
error("-r and --icf may not be used together");
if (Config->Pie)
error("-r and -pie may not be used together");
@ -519,6 +520,15 @@ static StringRef getDynamicLinker(opt::InputArgList &Args) {
return Arg->getValue();
}
static ICFLevel getICF(opt::InputArgList &Args) {
auto *Arg = Args.getLastArg(OPT_icf_none, OPT_icf_safe, OPT_icf_all);
if (!Arg || Arg->getOption().getID() == OPT_icf_none)
return ICFLevel::None;
if (Arg->getOption().getID() == OPT_icf_safe)
return ICFLevel::Safe;
return ICFLevel::All;
}
static StripPolicy getStrip(opt::InputArgList &Args) {
if (Args.hasArg(OPT_relocatable))
return StripPolicy::None;
@ -745,7 +755,7 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
Config->GcSections = Args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false);
Config->GnuUnique = Args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true);
Config->GdbIndex = Args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false);
Config->ICF = Args.hasFlag(OPT_icf_all, OPT_icf_none, false);
Config->ICF = getICF(Args);
Config->IgnoreDataAddressEquality =
Args.hasArg(OPT_ignore_data_address_equality);
Config->IgnoreFunctionAddressEquality =
@ -1225,16 +1235,60 @@ template <class ELFT> static void demoteSymbols() {
}
}
static bool keepUnique(Symbol *S) {
if (auto *D = dyn_cast_or_null<Defined>(S)) {
if (D->Section) {
D->Section->KeepUnique = true;
return true;
}
}
return false;
}
// Record sections that define symbols mentioned in --keep-unique <symbol>
// these sections are inelligible for ICF.
// and symbols referred to by address-significance tables. These sections are
// ineligible for ICF.
template <class ELFT>
static void findKeepUniqueSections(opt::InputArgList &Args) {
for (auto *Arg : Args.filtered(OPT_keep_unique)) {
StringRef Name = Arg->getValue();
if (auto *Sym = dyn_cast_or_null<Defined>(Symtab->find(Name)))
Sym->Section->KeepUnique = true;
else
if (!keepUnique(Symtab->find(Name)))
warn("could not find symbol " + Name + " to keep unique");
}
if (Config->ICF == ICFLevel::Safe) {
// Symbols in the dynsym could be address-significant in other executables
// or DSOs, so we conservatively mark them as address-significant.
for (Symbol *S : Symtab->getSymbols())
if (S->includeInDynsym())
keepUnique(S);
// Visit the address-significance table in each object file and mark each
// referenced symbol as address-significant.
for (InputFile *F : ObjectFiles) {
auto *Obj = cast<ObjFile<ELFT>>(F);
ArrayRef<Symbol *> Syms = Obj->getSymbols();
if (Obj->AddrsigSec) {
ArrayRef<uint8_t> Contents =
check(Obj->getObj().getSectionContents(Obj->AddrsigSec));
const uint8_t *Cur = Contents.begin();
while (Cur != Contents.end()) {
unsigned Size;
const char *Err;
uint64_t SymIndex = decodeULEB128(Cur, &Size, Contents.end(), &Err);
if (Err)
fatal(toString(F) + ": could not decode addrsig section: " + Err);
keepUnique(Syms[SymIndex]);
Cur += Size;
}
} else {
// If an object file does not have an address-significance table,
// conservatively mark all of its symbols as address-significant.
for (Symbol *S : Syms)
keepUnique(S);
}
}
}
}
// Do actual linking. Note that when this function is called,
@ -1409,8 +1463,8 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
markLive<ELFT>();
demoteSymbols<ELFT>();
mergeSections();
if (Config->ICF) {
findKeepUniqueSections(Args);
if (Config->ICF != ICFLevel::None) {
findKeepUniqueSections<ELFT>(Args);
doIcf<ELFT>();
}

View File

@ -173,8 +173,9 @@ static bool isEligible(InputSection *S) {
return false;
// Don't merge read only data sections unless
// --ignore-data-address-equality was passed.
if (!(S->Flags & SHF_EXECINSTR) && !Config->IgnoreDataAddressEquality)
// --ignore-data-address-equality or --icf=safe was passed.
if (!(S->Flags & SHF_EXECINSTR) &&
!(Config->IgnoreDataAddressEquality || Config->ICF == ICFLevel::Safe))
return false;
// Don't merge synthetic sections as their Data member is not valid and empty.

View File

@ -420,6 +420,17 @@ void ObjFile<ELFT>::initializeSections(
// if -r is given, we'll let the final link discard such sections.
// This is compatible with GNU.
if ((Sec.sh_flags & SHF_EXCLUDE) && !Config->Relocatable) {
if (Sec.sh_type == SHT_LLVM_ADDRSIG) {
// We ignore the address-significance table if we know that the object
// file was created by objcopy or ld -r. This is because these tools
// will reorder the symbols in the symbol table, invalidating the data
// in the address-significance table, which refers to symbols by index.
if (Sec.sh_link != 0)
this->AddrsigSec = &Sec;
else if (Config->ICF == ICFLevel::Safe)
warn(toString(this) + ": --icf=safe is incompatible with object "
"files created using objcopy or ld -r");
}
this->Sections[I] = &InputSection::Discarded;
continue;
}

View File

@ -215,6 +215,9 @@ public:
// but had one or more functions with the no_split_stack attribute.
bool SomeNoSplitStack = false;
// Pointer to this input file's .llvm_addrsig section, if it has one.
const Elf_Shdr *AddrsigSec = nullptr;
private:
void
initializeSections(llvm::DenseSet<llvm::CachedHashStringRef> &ComdatGroups);

View File

@ -170,6 +170,8 @@ def help: F<"help">, HelpText<"Print option help">;
def icf_all: F<"icf=all">, HelpText<"Enable identical code folding">;
def icf_safe: F<"icf=safe">, HelpText<"Enable safe identical code folding">;
def icf_none: F<"icf=none">, HelpText<"Disable identical code folding (default)">;
def ignore_function_address_equality: F<"ignore-function-address-equality">,

View File

@ -187,6 +187,8 @@ is the default.
Print a help message.
.It Fl -icf Ns = Ns Cm all
Enable identical code folding.
.It Fl -icf Ns = Ns Cm safe
Enable safe identical code folding.
.It Fl -icf Ns = Ns Cm none
Disable identical code folding.
.It Fl -image-base Ns = Ns Ar value

View File

@ -0,0 +1,9 @@
.section .text.non_addrsig1,"ax",@progbits
.globl non_addrsig1
non_addrsig1:
ret
.section .text.non_addrsig2,"ax",@progbits
.globl non_addrsig2
non_addrsig2:
ret

137
lld/test/ELF/icf-safe.s Normal file
View File

@ -0,0 +1,137 @@
# REQUIRES: x86
# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t1.o
# RUN: llvm-objcopy %t1.o %t1copy.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %S/Inputs/icf-safe.s -o %t2.o
# RUN: ld.lld %t1.o %t2.o -o %t2 --icf=safe --print-icf-sections | FileCheck %s
# RUN: ld.lld %t1.o %t2.o -o %t3 --icf=safe --print-icf-sections -shared | FileCheck --check-prefix=EXPORT %s
# RUN: ld.lld %t1.o %t2.o -o %t3 --icf=safe --print-icf-sections --export-dynamic | FileCheck --check-prefix=EXPORT %s
# RUN: ld.lld %t1copy.o -o %t4 --icf=safe 2>&1 | FileCheck --check-prefix=OBJCOPY %s
# CHECK-NOT: selected section {{.*}}:(.rodata.l1)
# CHECK: selected section {{.*}}:(.rodata.l3)
# CHECK: removing identical section {{.*}}:(.rodata.l4)
# CHECK-NOT: selected section {{.*}}:(.text.f1)
# CHECK: selected section {{.*}}:(.text.f3)
# CHECK: removing identical section {{.*}}:(.text.f4)
# CHECK-NOT: selected section {{.*}}:(.rodata.h1)
# CHECK: selected section {{.*}}:(.rodata.h3)
# CHECK: removing identical section {{.*}}:(.rodata.h4)
# CHECK-NOT: selected section {{.*}}:(.rodata.g1)
# CHECK: selected section {{.*}}:(.rodata.g3)
# CHECK: removing identical section {{.*}}:(.rodata.g4)
# CHECK-NOT: selected section {{.*}}:(.text.non_addrsig{{.}})
# llvm-mc normally emits an empty .text section into every object file. Since
# nothing actually refers to it via a relocation, it doesn't have any associated
# symbols (thus nor can anything refer to it via a relocation, making it safe to
# merge with the empty section in the other input file). Here we check that the
# only two sections merged are the two empty sections and the sections with only
# STB_LOCAL or STV_HIDDEN symbols. The dynsym entries should have prevented
# anything else from being merged.
# EXPORT-NOT: selected section
# EXPORT: selected section {{.*}}:(.rodata.l3)
# EXPORT: removing identical section {{.*}}:(.rodata.l4)
# EXPORT-NOT: selected section
# EXPORT: selected section {{.*}}:(.rodata.h3)
# EXPORT: removing identical section {{.*}}:(.rodata.h4)
# EXPORT-NOT: selected section
# EXPORT: selected section {{.*}}:(.text)
# EXPORT: removing identical section {{.*}}:(.text)
# EXPORT-NOT: selected section
# OBJCOPY: --icf=safe is incompatible with object files created using objcopy or ld -r
.section .text.f1,"ax",@progbits
.globl f1
f1:
ret
.section .text.f2,"ax",@progbits
.globl f2
f2:
ret
.section .text.f3,"ax",@progbits
.globl f3
f3:
ud2
.section .text.f4,"ax",@progbits
.globl f4
f4:
ud2
.section .rodata.g1,"a",@progbits
.globl g1
g1:
.byte 1
.section .rodata.g2,"a",@progbits
.globl g2
g2:
.byte 1
.section .rodata.g3,"a",@progbits
.globl g3
g3:
.byte 2
.section .rodata.g4,"a",@progbits
.globl g4
g4:
.byte 2
.section .rodata.l1,"a",@progbits
l1:
.byte 3
.section .rodata.l2,"a",@progbits
l2:
.byte 3
.section .rodata.l3,"a",@progbits
l3:
.byte 4
.section .rodata.l4,"a",@progbits
l4:
.byte 4
.section .rodata.h1,"a",@progbits
.globl h1
.hidden h1
h1:
.byte 5
.section .rodata.h2,"a",@progbits
.globl h2
.hidden h2
h2:
.byte 5
.section .rodata.h3,"a",@progbits
.globl h3
.hidden h3
h3:
.byte 6
.section .rodata.h4,"a",@progbits
.globl h4
.hidden h4
h4:
.byte 6
.addrsig
.addrsig_sym f1
.addrsig_sym f2
.addrsig_sym g1
.addrsig_sym g2
.addrsig_sym l1
.addrsig_sym l2
.addrsig_sym h1
.addrsig_sym h2