Introduce StringRefZ class to represent null-terminated strings.

StringRefZ is a class to represent a null-terminated string. String
length is computed lazily, so it's more efficient than StringRef to
represent strings in string table.

The motivation of defining this new class is to merge functions
that only differ in string types; we have many constructors that takes
`const char *` or `StringRef`. With StringRefZ, we can merge them.

Differential Revision: https://reviews.llvm.org/D27037

llvm-svn: 288172
This commit is contained in:
Rui Ueyama 2016-11-29 18:05:04 +00:00
parent c62b64a9e8
commit a13efc2a73
8 changed files with 95 additions and 84 deletions

View File

@ -438,6 +438,11 @@ SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) {
int Binding = Sym->getBinding(); int Binding = Sym->getBinding();
InputSectionBase<ELFT> *Sec = getSection(*Sym); InputSectionBase<ELFT> *Sec = getSection(*Sym);
uint8_t StOther = Sym->st_other;
uint8_t Type = Sym->getType();
uintX_t Value = Sym->st_value;
uintX_t Size = Sym->st_size;
if (Binding == STB_LOCAL) { if (Binding == STB_LOCAL) {
if (Sym->getType() == STT_FILE) if (Sym->getType() == STT_FILE)
SourceFile = check(Sym->getName(this->StringTable)); SourceFile = check(Sym->getName(this->StringTable));
@ -447,20 +452,19 @@ SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) {
const char *Name = this->StringTable.data() + Sym->st_name; const char *Name = this->StringTable.data() + Sym->st_name;
if (Sym->st_shndx == SHN_UNDEF) if (Sym->st_shndx == SHN_UNDEF)
return new (BAlloc) Undefined(Name, Sym->st_other, Sym->getType(), this); return new (BAlloc)
return new (BAlloc) DefinedRegular<ELFT>(Name, *Sym, Sec); Undefined(Name, /*IsLocal=*/true, StOther, Type, this);
return new (BAlloc) DefinedRegular<ELFT>(Name, /*IsLocal=*/true, StOther,
Type, Value, Size, Sec, this);
} }
StringRef Name = check(Sym->getName(this->StringTable)); StringRef Name = check(Sym->getName(this->StringTable));
uint8_t StOther = Sym->st_other;
uint8_t Type = Sym->getType();
uintX_t Value = Sym->st_value;
uintX_t Size = Sym->st_size;
switch (Sym->st_shndx) { switch (Sym->st_shndx) {
case SHN_UNDEF: case SHN_UNDEF:
return elf::Symtab<ELFT>::X return elf::Symtab<ELFT>::X
->addUndefined(Name, Binding, StOther, Type, ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type,
/*CanOmitFromDynSym=*/false, this) /*CanOmitFromDynSym=*/false, this)
->body(); ->body();
case SHN_COMMON: case SHN_COMMON:
@ -480,7 +484,7 @@ SymbolBody *elf::ObjectFile<ELFT>::createSymbolBody(const Elf_Sym *Sym) {
case STB_GNU_UNIQUE: case STB_GNU_UNIQUE:
if (Sec == &InputSection<ELFT>::Discarded) if (Sec == &InputSection<ELFT>::Discarded)
return elf::Symtab<ELFT>::X return elf::Symtab<ELFT>::X
->addUndefined(Name, Binding, StOther, Type, ->addUndefined(Name, /*IsLocal=*/false, Binding, StOther, Type,
/*CanOmitFromDynSym=*/false, this) /*CanOmitFromDynSym=*/false, this)
->body(); ->body();
return elf::Symtab<ELFT>::X return elf::Symtab<ELFT>::X
@ -723,12 +727,14 @@ static Symbol *createBitcodeSymbol(const std::vector<bool> &KeptComdats,
int C = check(ObjSym.getComdatIndex()); int C = check(ObjSym.getComdatIndex());
if (C != -1 && !KeptComdats[C]) if (C != -1 && !KeptComdats[C])
return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type, return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding,
CanOmitFromDynSym, F); Visibility, Type, CanOmitFromDynSym,
F);
if (Flags & BasicSymbolRef::SF_Undefined) if (Flags & BasicSymbolRef::SF_Undefined)
return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type, return Symtab<ELFT>::X->addUndefined(NameRef, /*IsLocal=*/false, Binding,
CanOmitFromDynSym, F); Visibility, Type, CanOmitFromDynSym,
F);
if (Flags & BasicSymbolRef::SF_Common) if (Flags & BasicSymbolRef::SF_Common)
return Symtab<ELFT>::X->addCommon(NameRef, ObjSym.getCommonSize(), return Symtab<ELFT>::X->addCommon(NameRef, ObjSym.getCommonSize(),

View File

@ -97,8 +97,8 @@ BitcodeCompiler::BitcodeCompiler() : LTOObj(createLTO()) {}
BitcodeCompiler::~BitcodeCompiler() = default; BitcodeCompiler::~BitcodeCompiler() = default;
static void undefine(Symbol *S) { static void undefine(Symbol *S) {
replaceBody<Undefined>(S, S->body()->getName(), STV_DEFAULT, S->body()->Type, replaceBody<Undefined>(S, S->body()->getName(), /*IsLocal=*/false,
nullptr); STV_DEFAULT, S->body()->Type, nullptr);
} }
void BitcodeCompiler::add(BitcodeFile &F) { void BitcodeCompiler::add(BitcodeFile &F) {

View File

@ -16,6 +16,7 @@
#include "llvm/Config/config.h" #include "llvm/Config/config.h"
#include "llvm/Demangle/Demangle.h" #include "llvm/Demangle/Demangle.h"
#include <algorithm> #include <algorithm>
#include <cstring>
using namespace llvm; using namespace llvm;
using namespace lld; using namespace lld;

View File

@ -26,6 +26,36 @@ std::vector<uint8_t> parseHex(StringRef S);
bool isValidCIdentifier(StringRef S); bool isValidCIdentifier(StringRef S);
StringRef unquote(StringRef S); StringRef unquote(StringRef S);
// This is a lazy version of StringRef. String size is computed lazily
// when it is needed. It is more efficient than StringRef to instantiate
// if you have a string whose size is unknown.
//
// ELF string tables contain a lot of null-terminated strings.
// Most of them are not necessary for the linker because they are names
// of local symbols and the linker doesn't use local symbol names for
// name resolution. So, we use this class to represents strings read
// from string tables.
class StringRefZ {
public:
StringRefZ() : Start(nullptr), Size(0) {}
StringRefZ(const char *S, size_t Size) : Start(S), Size(Size) {}
/*implicit*/ StringRefZ(const char *S) : Start(S), Size(-1) {}
/*implicit*/ StringRefZ(llvm::StringRef S)
: Start(S.data()), Size(S.size()) {}
operator llvm::StringRef() const {
if (Size == (size_t)-1)
Size = strlen(Start);
return {Start, Size};
}
private:
const char *Start;
mutable size_t Size;
};
// This class represents a glob pattern. Supported metacharacters // This class represents a glob pattern. Supported metacharacters
// are "*", "?", "[<chars>]" and "[^<chars>]". // are "*", "?", "[<chars>]" and "[^<chars>]".
class GlobPattern { class GlobPattern {

View File

@ -236,14 +236,15 @@ SymbolTable<ELFT>::insert(StringRef Name, uint8_t Type, uint8_t Visibility,
} }
template <class ELFT> Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name) { template <class ELFT> Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name) {
return addUndefined(Name, STB_GLOBAL, STV_DEFAULT, /*Type*/ 0, return addUndefined(Name, /*IsLocal=*/false, STB_GLOBAL, STV_DEFAULT,
/*Type*/ 0,
/*CanOmitFromDynSym*/ false, /*File*/ nullptr); /*CanOmitFromDynSym*/ false, /*File*/ nullptr);
} }
template <class ELFT> template <class ELFT>
Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, uint8_t Binding, Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, bool IsLocal,
uint8_t StOther, uint8_t Type, uint8_t Binding, uint8_t StOther,
bool CanOmitFromDynSym, uint8_t Type, bool CanOmitFromDynSym,
InputFile *File) { InputFile *File) {
Symbol *S; Symbol *S;
bool WasInserted; bool WasInserted;
@ -251,7 +252,7 @@ Symbol *SymbolTable<ELFT>::addUndefined(StringRef Name, uint8_t Binding,
insert(Name, Type, StOther & 3, CanOmitFromDynSym, File); insert(Name, Type, StOther & 3, CanOmitFromDynSym, File);
if (WasInserted) { if (WasInserted) {
S->Binding = Binding; S->Binding = Binding;
replaceBody<Undefined>(S, Name, StOther, Type, File); replaceBody<Undefined>(S, Name, IsLocal, StOther, Type, File);
return S; return S;
} }
if (Binding != STB_WEAK) { if (Binding != STB_WEAK) {
@ -378,8 +379,8 @@ Symbol *SymbolTable<ELFT>::addRegular(StringRef Name, uint8_t StOther,
/*CanOmitFromDynSym*/ false, File); /*CanOmitFromDynSym*/ false, File);
int Cmp = compareDefinedNonCommon(S, WasInserted, Binding); int Cmp = compareDefinedNonCommon(S, WasInserted, Binding);
if (Cmp > 0) if (Cmp > 0)
replaceBody<DefinedRegular<ELFT>>(S, Name, StOther, Type, Value, Size, replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type,
Section, File); Value, Size, Section, File);
else if (Cmp == 0) else if (Cmp == 0)
reportDuplicate(S->body(), Section, Value); reportDuplicate(S->body(), Section, Value);
return S; return S;
@ -432,7 +433,8 @@ Symbol *SymbolTable<ELFT>::addBitcode(StringRef Name, uint8_t Binding,
insert(Name, Type, StOther & 3, CanOmitFromDynSym, F); insert(Name, Type, StOther & 3, CanOmitFromDynSym, F);
int Cmp = compareDefinedNonCommon(S, WasInserted, Binding); int Cmp = compareDefinedNonCommon(S, WasInserted, Binding);
if (Cmp > 0) if (Cmp > 0)
replaceBody<DefinedRegular<ELFT>>(S, Name, StOther, Type, 0, 0, nullptr, F); replaceBody<DefinedRegular<ELFT>>(S, Name, /*IsLocal=*/false, StOther, Type,
0, 0, nullptr, F);
else if (Cmp == 0) else if (Cmp == 0)
reportDuplicate(S->body(), F); reportDuplicate(S->body(), F);
return S; return S;

View File

@ -55,8 +55,9 @@ public:
uint8_t Visibility = llvm::ELF::STV_HIDDEN); uint8_t Visibility = llvm::ELF::STV_HIDDEN);
Symbol *addUndefined(StringRef Name); Symbol *addUndefined(StringRef Name);
Symbol *addUndefined(StringRef Name, uint8_t Binding, uint8_t StOther, Symbol *addUndefined(StringRef Name, bool IsLocal, uint8_t Binding,
uint8_t Type, bool CanOmitFromDynSym, InputFile *File); uint8_t StOther, uint8_t Type, bool CanOmitFromDynSym,
InputFile *File);
Symbol *addRegular(StringRef Name, uint8_t StOther, uint8_t Type, Symbol *addRegular(StringRef Name, uint8_t StOther, uint8_t Type,
uintX_t Value, uintX_t Size, uint8_t Binding, uintX_t Value, uintX_t Size, uint8_t Binding,

View File

@ -92,22 +92,12 @@ static typename ELFT::uint getSymVA(const SymbolBody &Body,
llvm_unreachable("invalid symbol kind"); llvm_unreachable("invalid symbol kind");
} }
SymbolBody::SymbolBody(Kind K, const char *Name, uint8_t StOther, uint8_t Type) SymbolBody::SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther,
: SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(true), uint8_t Type)
: SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(IsLocal),
IsInGlobalMipsGot(false), Is32BitMipsGot(false), Type(Type), IsInGlobalMipsGot(false), Is32BitMipsGot(false), Type(Type),
StOther(StOther), Name(Name) {} StOther(StOther), Name(Name) {}
SymbolBody::SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type)
: SymbolKind(K), NeedsCopyOrPltAddr(false), IsLocal(false),
IsInGlobalMipsGot(false), Is32BitMipsGot(false), Type(Type),
StOther(StOther), NameLen(Name.size()), Name(Name.data()) {}
StringRef SymbolBody::getName() const {
if (NameLen == (uint32_t)-1)
NameLen = strlen(Name);
return StringRef(Name, NameLen);
}
// Returns true if a symbol can be replaced at load-time by a symbol // Returns true if a symbol can be replaced at load-time by a symbol
// with the same name defined in other ELF executable or DSO. // with the same name defined in other ELF executable or DSO.
bool SymbolBody::isPreemptible() const { bool SymbolBody::isPreemptible() const {
@ -203,7 +193,7 @@ void SymbolBody::parseSymbolVersion() {
return; return;
// Truncate the symbol name so that it doesn't include the version string. // Truncate the symbol name so that it doesn't include the version string.
NameLen = Pos; Name = {S.data(), Pos};
// '@@' in a symbol name means the default version. // '@@' in a symbol name means the default version.
// It is usually the most recent one. // It is usually the most recent one.
@ -226,11 +216,9 @@ void SymbolBody::parseSymbolVersion() {
error("symbol " + S + " has undefined version " + Verstr); error("symbol " + S + " has undefined version " + Verstr);
} }
Defined::Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type) Defined::Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther,
: SymbolBody(K, Name, StOther, Type) {} uint8_t Type)
: SymbolBody(K, Name, IsLocal, StOther, Type) {}
Defined::Defined(Kind K, const char *Name, uint8_t StOther, uint8_t Type)
: SymbolBody(K, Name, StOther, Type) {}
template <class ELFT> bool DefinedRegular<ELFT>::isMipsPIC() const { template <class ELFT> bool DefinedRegular<ELFT>::isMipsPIC() const {
if (!Section || !isFunc()) if (!Section || !isFunc())
@ -239,27 +227,23 @@ template <class ELFT> bool DefinedRegular<ELFT>::isMipsPIC() const {
(Section->getFile()->getObj().getHeader()->e_flags & EF_MIPS_PIC); (Section->getFile()->getObj().getHeader()->e_flags & EF_MIPS_PIC);
} }
Undefined::Undefined(StringRef Name, uint8_t StOther, uint8_t Type, Undefined::Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther,
InputFile *File) uint8_t Type, InputFile *File)
: SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) { : SymbolBody(SymbolBody::UndefinedKind, Name, IsLocal, StOther, Type) {
this->File = File;
}
Undefined::Undefined(const char *Name, uint8_t StOther, uint8_t Type,
InputFile *File)
: SymbolBody(SymbolBody::UndefinedKind, Name, StOther, Type) {
this->File = File; this->File = File;
} }
template <typename ELFT> template <typename ELFT>
DefinedSynthetic<ELFT>::DefinedSynthetic(StringRef N, uintX_t Value, DefinedSynthetic<ELFT>::DefinedSynthetic(StringRef Name, uintX_t Value,
const OutputSectionBase *Section) const OutputSectionBase *Section)
: Defined(SymbolBody::DefinedSyntheticKind, N, STV_HIDDEN, 0 /* Type */), : Defined(SymbolBody::DefinedSyntheticKind, Name, /*IsLocal=*/false,
STV_HIDDEN, 0 /* Type */),
Value(Value), Section(Section) {} Value(Value), Section(Section) {}
DefinedCommon::DefinedCommon(StringRef N, uint64_t Size, uint64_t Alignment, DefinedCommon::DefinedCommon(StringRef Name, uint64_t Size, uint64_t Alignment,
uint8_t StOther, uint8_t Type, InputFile *File) uint8_t StOther, uint8_t Type, InputFile *File)
: Defined(SymbolBody::DefinedCommonKind, N, StOther, Type), : Defined(SymbolBody::DefinedCommonKind, Name, /*IsLocal=*/false, StOther,
Type),
Alignment(Alignment), Size(Size) { Alignment(Alignment), Size(Size) {
this->File = File; this->File = File;
} }

View File

@ -16,6 +16,7 @@
#define LLD_ELF_SYMBOLS_H #define LLD_ELF_SYMBOLS_H
#include "InputSection.h" #include "InputSection.h"
#include "Strings.h"
#include "lld/Core/LLVM.h" #include "lld/Core/LLVM.h"
#include "llvm/Object/Archive.h" #include "llvm/Object/Archive.h"
@ -28,7 +29,6 @@ class ArchiveFile;
class BitcodeFile; class BitcodeFile;
class InputFile; class InputFile;
class LazyObjectFile; class LazyObjectFile;
class SymbolBody;
template <class ELFT> class ObjectFile; template <class ELFT> class ObjectFile;
template <class ELFT> class OutputSection; template <class ELFT> class OutputSection;
class OutputSectionBase; class OutputSectionBase;
@ -69,7 +69,7 @@ public:
bool isShared() const { return SymbolKind == SharedKind; } bool isShared() const { return SymbolKind == SharedKind; }
bool isLocal() const { return IsLocal; } bool isLocal() const { return IsLocal; }
bool isPreemptible() const; bool isPreemptible() const;
StringRef getName() const; StringRef getName() const { return Name; }
uint8_t getVisibility() const { return StOther & 0x3; } uint8_t getVisibility() const { return StOther & 0x3; }
void parseSymbolVersion(); void parseSymbolVersion();
@ -98,8 +98,8 @@ public:
uint32_t GlobalDynIndex = -1; uint32_t GlobalDynIndex = -1;
protected: protected:
SymbolBody(Kind K, StringRef Name, uint8_t StOther, uint8_t Type); SymbolBody(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther,
SymbolBody(Kind K, const char *Name, uint8_t StOther, uint8_t Type); uint8_t Type);
const unsigned SymbolKind : 8; const unsigned SymbolKind : 8;
@ -136,17 +136,13 @@ public:
bool isFile() const { return Type == llvm::ELF::STT_FILE; } bool isFile() const { return Type == llvm::ELF::STT_FILE; }
protected: protected:
// Local symbols are not inserted to the symbol table, so we usually StringRefZ Name;
// don't need their names at all. We read symbol names lazily if possible.
mutable uint32_t NameLen = (uint32_t)-1;
const char *Name;
}; };
// The base class for any defined symbols. // The base class for any defined symbols.
class Defined : public SymbolBody { class Defined : public SymbolBody {
public: public:
Defined(Kind K, StringRef Name, uint8_t StOther, uint8_t Type); Defined(Kind K, StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type);
Defined(Kind K, const char *Name, uint8_t StOther, uint8_t Type);
static bool classof(const SymbolBody *S) { return S->isDefined(); } static bool classof(const SymbolBody *S) { return S->isDefined(); }
}; };
@ -175,25 +171,15 @@ template <class ELFT> class DefinedRegular : public Defined {
typedef typename ELFT::uint uintX_t; typedef typename ELFT::uint uintX_t;
public: public:
DefinedRegular(StringRef Name, uint8_t StOther, uint8_t Type, uintX_t Value, DefinedRegular(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type,
uintX_t Size, InputSectionBase<ELFT> *Section, InputFile *File) uintX_t Value, uintX_t Size, InputSectionBase<ELFT> *Section,
: Defined(SymbolBody::DefinedRegularKind, Name, StOther, Type), InputFile *File)
: Defined(SymbolBody::DefinedRegularKind, Name, IsLocal, StOther, Type),
Value(Value), Size(Size), Value(Value), Size(Size),
Section(Section ? Section->Repl : NullInputSection) { Section(Section ? Section->Repl : NullInputSection) {
this->File = File; this->File = File;
} }
DefinedRegular(const char *Name, const Elf_Sym &Sym,
InputSectionBase<ELFT> *Section)
: Defined(SymbolBody::DefinedRegularKind, Name, Sym.st_other,
Sym.getType()),
Value(Sym.st_value), Size(Sym.st_size),
Section(Section ? Section->Repl : NullInputSection) {
assert(isLocal());
if (Section)
this->File = Section->getFile();
}
// Return true if the symbol is a PIC function. // Return true if the symbol is a PIC function.
bool isMipsPIC() const; bool isMipsPIC() const;
@ -248,8 +234,8 @@ public:
class Undefined : public SymbolBody { class Undefined : public SymbolBody {
public: public:
Undefined(StringRef Name, uint8_t StOther, uint8_t Type, InputFile *F); Undefined(StringRefZ Name, bool IsLocal, uint8_t StOther, uint8_t Type,
Undefined(const char *Name, uint8_t StOther, uint8_t Type, InputFile *F); InputFile *F);
static bool classof(const SymbolBody *S) { static bool classof(const SymbolBody *S) {
return S->kind() == UndefinedKind; return S->kind() == UndefinedKind;
@ -270,7 +256,8 @@ public:
SharedSymbol(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym, SharedSymbol(SharedFile<ELFT> *F, StringRef Name, const Elf_Sym &Sym,
const Elf_Verdef *Verdef) const Elf_Verdef *Verdef)
: Defined(SymbolBody::SharedKind, Name, Sym.st_other, Sym.getType()), : Defined(SymbolBody::SharedKind, Name, /*IsLocal=*/false, Sym.st_other,
Sym.getType()),
Sym(Sym), Verdef(Verdef) { Sym(Sym), Verdef(Verdef) {
// IFuncs defined in DSOs are treated as functions by the static linker. // IFuncs defined in DSOs are treated as functions by the static linker.
if (isGnuIFunc()) if (isGnuIFunc())
@ -309,7 +296,7 @@ public:
protected: protected:
Lazy(SymbolBody::Kind K, StringRef Name, uint8_t Type) Lazy(SymbolBody::Kind K, StringRef Name, uint8_t Type)
: SymbolBody(K, Name, llvm::ELF::STV_DEFAULT, Type) {} : SymbolBody(K, Name, /*IsLocal=*/false, llvm::ELF::STV_DEFAULT, Type) {}
}; };
// LazyArchive symbols represents symbols in archive files. // LazyArchive symbols represents symbols in archive files.