Speed up --start-lib and --end-lib.

--{start,end}-lib give files grouped by the options the archive file
semantics. That is, each object file between them acts as if it were
in an archive file whose sole member is the file.

Therefore, files between --{start,end}-lib are linked to the final
output only if they are needed to resolve some undefined symbols.

Previously, the feature was implemented this way:

 1. We read a symbol table and insert defined symbols to the symbol
    table as lazy symbols.

 2. If an undefind symbol is resolved to a lazy symbol, that lazy
    symbol instantiate ObjFile class for that symbol, which re-insert
    all defined symbols to the symbol table.

So, if an ObjFile is instantiated, defined symbols are inserted to the
symbol table twice. Since inserting long symbol names is not cheap,
there's a room to optimize here.

This patch optimzies it. Now, LazyObjFile remembers symbol handles and
passed them over to a new ObjFile instance, so that the ObjFile
doesn't insert the same strings.

Here is a quick benchmark to link clang. "Original" is the original
lld with unmodified command line options. For "Case 1" and "Case 2", I
extracted all files from archive files and replace .a's in a command
line with .o's wrapped with --{start,end}-lib. I used the original lld
for Case 1" and use this patch for Case 2.

  Original: 5.892
    Case 1: 6.001 (+1.8%)
    Case 2: 5.701 (-3.2%)

So, interestingly, --{start,end}-lib are now faster than the regular
linking scheme with archive files. That's perhaps not too surprising,
though, because for regular archive files, we look up the symbol table
with the same string twice.

Differential Revision: https://reviews.llvm.org/D62188

llvm-svn: 361473
This commit is contained in:
Rui Ueyama 2019-05-23 09:53:30 +00:00
parent 77b4f0abb8
commit 4254840313
3 changed files with 102 additions and 53 deletions

View File

@ -913,62 +913,91 @@ StringRef ObjFile<ELFT>::getSectionName(const Elf_Shdr &Sec) {
return CHECK(getObj().getSectionName(&Sec, SectionStringTable), this);
}
// Initialize this->Symbols. this->Symbols is a parallel array as
// its corresponding ELF symbol table.
template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
this->Symbols.reserve(this->getELFSyms<ELFT>().size());
for (const Elf_Sym &Sym : this->getELFSyms<ELFT>())
this->Symbols.push_back(createSymbol(&Sym));
}
ArrayRef<Elf_Sym> ESyms = this->getELFSyms<ELFT>();
this->Symbols.resize(ESyms.size());
template <class ELFT> Symbol *ObjFile<ELFT>::createSymbol(const Elf_Sym *Sym) {
uint32_t SecIdx = getSectionIndex(*Sym);
if (SecIdx >= this->Sections.size())
fatal(toString(this) + ": invalid section index: " + Twine(SecIdx));
// Our symbol table may have already been partially initialized
// because of LazyObjFile.
for (size_t I = 0, End = ESyms.size(); I != End; ++I)
if (!this->Symbols[I] && ESyms[I].getBinding() != STB_LOCAL)
this->Symbols[I] =
Symtab->insert(CHECK(ESyms[I].getName(this->StringTable), this));
InputSectionBase *Sec = this->Sections[SecIdx];
uint8_t Binding = Sym->getBinding();
uint8_t StOther = Sym->st_other;
uint8_t Type = Sym->getType();
uint64_t Value = Sym->st_value;
uint64_t Size = Sym->st_size;
// Fill this->Symbols. A symbol is either local or global.
for (size_t I = 0, End = ESyms.size(); I != End; ++I) {
const Elf_Sym &ESym = ESyms[I];
if (Binding == STB_LOCAL) {
if (Sym->getType() == STT_FILE)
SourceFile = CHECK(Sym->getName(this->StringTable), this);
// Read symbol attributes.
uint32_t SecIdx = getSectionIndex(ESym);
if (SecIdx >= this->Sections.size())
fatal(toString(this) + ": invalid section index: " + Twine(SecIdx));
if (this->StringTable.size() <= Sym->st_name)
fatal(toString(this) + ": invalid symbol name offset");
InputSectionBase *Sec = this->Sections[SecIdx];
uint8_t Binding = ESym.getBinding();
uint8_t StOther = ESym.st_other;
uint8_t Type = ESym.getType();
uint64_t Value = ESym.st_value;
uint64_t Size = ESym.st_size;
StringRefZ Name = this->StringTable.data() + ESym.st_name;
StringRefZ Name = this->StringTable.data() + Sym->st_name;
if (Sym->st_shndx == SHN_UNDEF)
return make<Undefined>(this, Name, Binding, StOther, Type);
return make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec);
}
// Handle local symbols. Local symbols are not added to the symbol
// table because they are not visible from other object files. We
// allocate symbol instances and add their pointers to Symbols.
if (Binding == STB_LOCAL) {
if (ESym.getType() == STT_FILE)
SourceFile = CHECK(ESym.getName(this->StringTable), this);
StringRef Name = CHECK(Sym->getName(this->StringTable), this);
if (this->StringTable.size() <= ESym.st_name)
fatal(toString(this) + ": invalid symbol name offset");
if (Sym->st_shndx == SHN_UNDEF)
return Symtab->addSymbol(Undefined{this, Name, Binding, StOther, Type});
if (ESym.st_shndx == SHN_UNDEF)
this->Symbols[I] = make<Undefined>(this, Name, Binding, StOther, Type);
else
this->Symbols[I] =
make<Defined>(this, Name, Binding, StOther, Type, Value, Size, Sec);
continue;
}
if (Sec == &InputSection::Discarded)
return Symtab->addSymbol(Undefined{this, Name, Binding, StOther, Type,
/*DiscardedSecIdx=*/SecIdx});
// Handle global undefined symbols.
if (ESym.st_shndx == SHN_UNDEF) {
resolveSymbol(this->Symbols[I],
Undefined{this, Name, Binding, StOther, Type});
continue;
}
if (Sym->st_shndx == SHN_COMMON) {
if (Value == 0 || Value >= UINT32_MAX)
fatal(toString(this) + ": common symbol '" + Name +
"' has invalid alignment: " + Twine(Value));
return Symtab->addSymbol(
CommonSymbol{this, Name, Binding, StOther, Type, Value, Size});
}
// Handle global common symbols.
if (ESym.st_shndx == SHN_COMMON) {
if (Value == 0 || Value >= UINT32_MAX)
fatal(toString(this) + ": common symbol '" + StringRef(Name.Data) +
"' has invalid alignment: " + Twine(Value));
resolveSymbol(this->Symbols[I], CommonSymbol{this, Name, Binding, StOther,
Type, Value, Size});
continue;
}
// If a defined symbol is in a discarded section, handle it as if it
// were an undefined symbol. Such symbol doesn't comply with the
// standard, but in practice, a .eh_frame often directly refer
// COMDAT member sections, and if a comdat group is discarded, some
// defined symbol in a .eh_frame becomes dangling symbols.
if (Sec == &InputSection::Discarded) {
resolveSymbol(this->Symbols[I],
Undefined{this, Name, Binding, StOther, Type, SecIdx});
continue;
}
// Handle global defined symbols.
if (Binding == STB_GLOBAL || Binding == STB_WEAK ||
Binding == STB_GNU_UNIQUE) {
resolveSymbol(this->Symbols[I], Defined{this, Name, Binding, StOther,
Type, Value, Size, Sec});
continue;
}
switch (Binding) {
default:
fatal(toString(this) + ": unexpected binding: " + Twine((int)Binding));
case STB_GLOBAL:
case STB_WEAK:
case STB_GNU_UNIQUE:
return Symtab->addSymbol(
Defined{this, Name, Binding, StOther, Type, Value, Size, Sec});
}
}
@ -1455,10 +1484,16 @@ InputFile *LazyObjFile::fetch() {
InputFile *File = createObjectFile(MBRef, ArchiveName, OffsetInArchive);
File->GroupId = GroupId;
// Copy symbol vector so that the new InputFile doesn't have to
// insert the same defined symbols to the symbol table again.
File->Symbols = std::move(Symbols);
return File;
}
template <class ELFT> void LazyObjFile::parse() {
using Elf_Sym = typename ELFT::Sym;
// A lazy object file wraps either a bitcode file or an ELF file.
if (isBitcode(this->MB)) {
std::unique_ptr<lto::InputFile> Obj =
@ -1476,6 +1511,7 @@ template <class ELFT> void LazyObjFile::parse() {
return;
}
// Find a symbol table.
ELFFile<ELFT> Obj = check(ELFFile<ELFT>::create(MB.getBuffer()));
ArrayRef<typename ELFT::Shdr> Sections = CHECK(Obj.sections(), this);
@ -1483,16 +1519,28 @@ template <class ELFT> void LazyObjFile::parse() {
if (Sec.sh_type != SHT_SYMTAB)
continue;
typename ELFT::SymRange Syms = CHECK(Obj.symbols(&Sec), this);
// A symbol table is found.
ArrayRef<Elf_Sym> ESyms = CHECK(Obj.symbols(&Sec), this);
uint32_t FirstGlobal = Sec.sh_info;
StringRef StringTable =
CHECK(Obj.getStringTableForSymtab(Sec, Sections), this);
StringRef Strtab = CHECK(Obj.getStringTableForSymtab(Sec, Sections), this);
this->Symbols.resize(ESyms.size());
for (const typename ELFT::Sym &Sym : Syms.slice(FirstGlobal)) {
if (Sym.st_shndx == SHN_UNDEF)
// Get existing symbols or insert placeholder symbols.
for (size_t I = FirstGlobal, End = ESyms.size(); I != End; ++I)
if (ESyms[I].st_shndx != SHN_UNDEF)
this->Symbols[I] = Symtab->insert(CHECK(ESyms[I].getName(Strtab), this));
// Replace existing symbols with LazyObject symbols.
//
// resolveSymbol() may trigger this->fetch() if an existing symbol
// is an undefined symbol. If that happens, this LazyObjFile has
// served its purpose, and we can exit from the loop early.
for (Symbol *Sym : this->Symbols) {
if (!Sym)
continue;
Symtab->addSymbol(
LazyObject{*this, CHECK(Sym.getName(StringTable), this)});
resolveSymbol(Sym, LazyObject{*this, Sym->getName()});
if (AddedToLink)
return;
}
return;
}

View File

@ -139,10 +139,11 @@ public:
// Index of MIPS GOT built for this file.
llvm::Optional<size_t> MipsGotIndex;
std::vector<Symbol *> Symbols;
protected:
InputFile(Kind K, MemoryBufferRef M);
std::vector<InputSectionBase *> Sections;
std::vector<Symbol *> Symbols;
private:
const Kind FileKind;
@ -255,7 +256,6 @@ private:
StringRef getSectionName(const Elf_Shdr &Sec);
bool shouldMerge(const Elf_Shdr &Sec);
Symbol *createSymbol(const Elf_Sym *Sym);
// Each ELF symbol contains a section index which the symbol belongs to.
// However, because the number of bits dedicated for that is limited, a

View File

@ -92,6 +92,7 @@ Symbol *SymbolTable::insert(StringRef Name) {
Symbol *Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>());
SymVector.push_back(Sym);
Sym->setName(Name);
Sym->SymbolKind = Symbol::PlaceholderKind;
Sym->VersionId = Config->DefaultSymbolVersion;
Sym->Visibility = STV_DEFAULT;