diff --git a/llvm/include/llvm/MC/StringTableBuilder.h b/llvm/include/llvm/MC/StringTableBuilder.h index f2b8ecd2d997..90d1bc36f166 100644 --- a/llvm/include/llvm/MC/StringTableBuilder.h +++ b/llvm/include/llvm/MC/StringTableBuilder.h @@ -15,6 +15,24 @@ #include namespace llvm { +class raw_ostream; + +class CachedHashString { + const char *P; + uint32_t Size; + uint32_t Hash; + +public: + CachedHashString(StringRef S) + : CachedHashString(S, DenseMapInfo::getHashValue(S)) {} + CachedHashString(StringRef S, uint32_t Hash) + : P(S.data()), Size(S.size()), Hash(Hash) { + assert(S.size() <= std::numeric_limits::max()); + } + + StringRef val() const { return StringRef(P, Size); } + uint32_t hash() const { return Hash; } +}; /// \brief Utility for building string tables with deduplicated suffixes. class StringTableBuilder { @@ -22,16 +40,18 @@ public: enum Kind { ELF, WinCOFF, MachO, RAW }; private: - SmallString<256> StringTable; - DenseMap, size_t> StringIndexMap; + DenseMap StringIndexMap; size_t Size = 0; Kind K; unsigned Alignment; + bool Finalized = false; void finalizeStringTable(bool Optimize); + void initSize(); public: StringTableBuilder(Kind K, unsigned Alignment = 1); + ~StringTableBuilder(); /// \brief Add a string to the builder. Returns the position of S in the /// table. The position will be changed if finalize is used. @@ -46,28 +66,18 @@ public: /// returned by add will still be valid. void finalizeInOrder(); - /// \brief Retrieve the string table data. Can only be used after the table - /// is finalized. - StringRef data() const { - assert(isFinalized()); - return StringTable; - } - /// \brief Get the offest of a string in the string table. Can only be used /// after the table is finalized. size_t getOffset(StringRef S) const; - const DenseMap, size_t> &getMap() const { - return StringIndexMap; - } - size_t getSize() const { return Size; } void clear(); + void write(raw_ostream &OS) const; + void write(uint8_t *Buf) const; + private: - bool isFinalized() const { - return !StringTable.empty(); - } + bool isFinalized() const { return Finalized; } }; } // end llvm namespace diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index dc21b48ca6f6..b115aabc8aff 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -1127,7 +1127,7 @@ void ELFObjectWriter::writeRelocations(const MCAssembler &Asm, const MCSectionELF *ELFObjectWriter::createStringTable(MCContext &Ctx) { const MCSectionELF *StrtabSection = SectionTable[StringTableIndex - 1]; - getStream() << StrTabBuilder.data(); + StrTabBuilder.write(getStream()); return StrtabSection; } diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp index e39271949d94..ce8e216e9164 100644 --- a/llvm/lib/MC/MachObjectWriter.cpp +++ b/llvm/lib/MC/MachObjectWriter.cpp @@ -882,7 +882,7 @@ void MachObjectWriter::writeObject(MCAssembler &Asm, sizeof(MachO::nlist_64) : sizeof(MachO::nlist)); writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, - StringTableOffset, StringTable.data().size()); + StringTableOffset, StringTable.getSize()); writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, FirstExternalSymbol, NumExternalSymbols, @@ -977,7 +977,7 @@ void MachObjectWriter::writeObject(MCAssembler &Asm, writeNlist(Entry, Layout); // Write the string table. - getStream() << StringTable.data(); + StringTable.write(getStream()); } } diff --git a/llvm/lib/MC/StringTableBuilder.cpp b/llvm/lib/MC/StringTableBuilder.cpp index 7f69871fd452..3fde526e588c 100644 --- a/llvm/lib/MC/StringTableBuilder.cpp +++ b/llvm/lib/MC/StringTableBuilder.cpp @@ -11,13 +11,37 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/COFF.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; -StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment) - : K(K), Alignment(Alignment) { +namespace llvm { +template <> struct DenseMapInfo { + static CachedHashString getEmptyKey() { + StringRef S = DenseMapInfo::getEmptyKey(); + return {S, 0}; + } + static CachedHashString getTombstoneKey() { + StringRef S = DenseMapInfo::getTombstoneKey(); + return {S, 0}; + } + static unsigned getHashValue(CachedHashString Val) { + assert(!isEqual(Val, getEmptyKey()) && "Cannot hash the empty key!"); + assert(!isEqual(Val, getTombstoneKey()) && + "Cannot hash the tombstone key!"); + return Val.hash(); + } + static bool isEqual(CachedHashString A, CachedHashString B) { + return DenseMapInfo::isEqual(A.val(), B.val()); + } +}; +} + +StringTableBuilder::~StringTableBuilder() {} + +void StringTableBuilder::initSize() { // Account for leading bytes in table so that offsets returned from add are // correct. switch (K) { @@ -26,19 +50,45 @@ StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment) break; case MachO: case ELF: + // Start the table with a NUL byte. Size = 1; break; case WinCOFF: + // Make room to write the table size later. Size = 4; break; } } -typedef std::pair, size_t> StringPair; +StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment) + : K(K), Alignment(Alignment) { + initSize(); +} + +void StringTableBuilder::write(raw_ostream &OS) const { + assert(isFinalized()); + SmallString<0> Data; + Data.resize(getSize()); + write((uint8_t *)&Data[0]); + OS << Data; +} + +typedef std::pair StringPair; + +void StringTableBuilder::write(uint8_t *Buf) const { + assert(isFinalized()); + for (const StringPair &P : StringIndexMap) { + StringRef Data = P.first.val(); + memcpy(Buf + P.second, Data.data(), Data.size()); + } + if (K != WinCOFF) + return; + support::endian::write32le(Buf, Size); +} // Returns the character at Pos from end of a string. static int charTailAt(StringPair *P, size_t Pos) { - StringRef S = P->first.Val; + StringRef S = P->first.val(); if (Pos >= S.size()) return -1; return (unsigned char)S[S.size() - Pos - 1]; @@ -86,90 +136,49 @@ void StringTableBuilder::finalizeInOrder() { } void StringTableBuilder::finalizeStringTable(bool Optimize) { - std::vector Strings; - Strings.reserve(StringIndexMap.size()); - for (StringPair &P : StringIndexMap) - Strings.push_back(&P); + Finalized = true; - if (!Strings.empty()) { - // If we're optimizing, sort by name. If not, sort by previously assigned - // offset. - if (Optimize) { + if (Optimize) { + std::vector Strings; + Strings.reserve(StringIndexMap.size()); + for (StringPair &P : StringIndexMap) + Strings.push_back(&P); + + if (!Strings.empty()) { + // If we're optimizing, sort by name. If not, sort by previously assigned + // offset. multikey_qsort(&Strings[0], &Strings[0] + Strings.size(), 0); - } else { - std::sort(Strings.begin(), Strings.end(), - [](const StringPair *LHS, const StringPair *RHS) { - return LHS->second < RHS->second; - }); } - } - switch (K) { - case RAW: - break; - case ELF: - case MachO: - // Start the table with a NUL byte. - StringTable += '\x00'; - break; - case WinCOFF: - // Make room to write the table size later. - StringTable.append(4, '\x00'); - break; - } + initSize(); - StringRef Previous; - for (StringPair *P : Strings) { - StringRef S = P->first.Val; - if (K == WinCOFF) - assert(S.size() > COFF::NameSize && "Short string in COFF string table!"); - - if (Optimize && Previous.endswith(S)) { - size_t Pos = StringTable.size() - S.size() - (K != RAW); - if (!(Pos & (Alignment - 1))) { - P->second = Pos; - continue; + StringRef Previous; + for (StringPair *P : Strings) { + StringRef S = P->first.val(); + if (Previous.endswith(S)) { + size_t Pos = Size - S.size() - (K != RAW); + if (!(Pos & (Alignment - 1))) { + P->second = Pos; + continue; + } } - } - if (Optimize) { - size_t Start = alignTo(StringTable.size(), Alignment); - P->second = Start; - StringTable.append(Start - StringTable.size(), '\0'); - } else { - assert(P->second == StringTable.size() && - "different strtab offset after finalization"); - } + Size = alignTo(Size, Alignment); + P->second = Size; - StringTable += S; - if (K != RAW) - StringTable += '\x00'; - Previous = S; + Size += S.size(); + if (K != RAW) + ++Size; + Previous = S; + } } - switch (K) { - case RAW: - case ELF: - break; - case MachO: - // Pad to multiple of 4. - while (StringTable.size() % 4) - StringTable += '\x00'; - break; - case WinCOFF: - // Write the table size in the first word. - assert(StringTable.size() <= std::numeric_limits::max()); - uint32_t Size = static_cast(StringTable.size()); - support::endian::write( - StringTable.data(), Size); - break; - } - - Size = StringTable.size(); + if (K == MachO) + Size = alignTo(Size, 4); // Pad to multiple of 4. } void StringTableBuilder::clear() { - StringTable.clear(); + Finalized = false; StringIndexMap.clear(); } @@ -181,6 +190,9 @@ size_t StringTableBuilder::getOffset(StringRef S) const { } size_t StringTableBuilder::add(StringRef S) { + if (K == WinCOFF) + assert(S.size() > COFF::NameSize && "Short string in COFF string table!"); + assert(!isFinalized()); size_t Start = alignTo(Size, Alignment); auto P = StringIndexMap.insert(std::make_pair(S, Start)); diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp index f316a5af387d..819d446d216d 100644 --- a/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -1082,7 +1082,7 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm, if (Symbol->getIndex() != -1) WriteSymbol(*Symbol); - getStream().write(Strings.data().data(), Strings.data().size()); + Strings.write(getStream()); } MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_) diff --git a/llvm/tools/yaml2obj/yaml2elf.cpp b/llvm/tools/yaml2obj/yaml2elf.cpp index c98093431a7e..8fd2bfd16726 100644 --- a/llvm/tools/yaml2obj/yaml2elf.cpp +++ b/llvm/tools/yaml2obj/yaml2elf.cpp @@ -305,9 +305,8 @@ void ELFState::initStrtabSectionHeader(Elf_Shdr &SHeader, StringRef Name, zero(SHeader); SHeader.sh_name = DotShStrtab.getOffset(Name); SHeader.sh_type = ELF::SHT_STRTAB; - CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign) - << STB.data(); - SHeader.sh_size = STB.data().size(); + STB.write(CBA.getOSAndAlignedOffset(SHeader.sh_offset, SHeader.sh_addralign)); + SHeader.sh_size = STB.getSize(); SHeader.sh_addralign = 1; } diff --git a/llvm/unittests/MC/StringTableBuilderTest.cpp b/llvm/unittests/MC/StringTableBuilderTest.cpp index f78d3588ffff..517ada5ed963 100644 --- a/llvm/unittests/MC/StringTableBuilderTest.cpp +++ b/llvm/unittests/MC/StringTableBuilderTest.cpp @@ -32,7 +32,11 @@ TEST(StringTableBuilderTest, BasicELF) { Expected += "foo"; Expected += '\x00'; - EXPECT_EQ(Expected, B.data()); + SmallString<64> Data; + raw_svector_ostream OS(Data); + B.write(OS); + + EXPECT_EQ(Expected, Data); EXPECT_EQ(1U, B.getOffset("foobar")); EXPECT_EQ(4U, B.getOffset("bar")); EXPECT_EQ(8U, B.getOffset("foo")); @@ -50,7 +54,7 @@ TEST(StringTableBuilderTest, BasicWinCOFF) { // size_field + "pygmy hippopotamus\0" + "river horse\0" uint32_t ExpectedSize = 4 + 19 + 12; - EXPECT_EQ(ExpectedSize, B.data().size()); + EXPECT_EQ(ExpectedSize, B.getSize()); std::string Expected; @@ -62,7 +66,11 @@ TEST(StringTableBuilderTest, BasicWinCOFF) { Expected += "river horse"; Expected += '\x00'; - EXPECT_EQ(Expected, B.data()); + SmallString<64> Data; + raw_svector_ostream OS(Data); + B.write(OS); + + EXPECT_EQ(Expected, Data); EXPECT_EQ(4U, B.getOffset("pygmy hippopotamus")); EXPECT_EQ(10U, B.getOffset("hippopotamus")); EXPECT_EQ(23U, B.getOffset("river horse")); @@ -85,7 +93,11 @@ TEST(StringTableBuilderTest, ELFInOrder) { Expected += "foobar"; Expected += '\x00'; - EXPECT_EQ(Expected, B.data()); + SmallString<64> Data; + raw_svector_ostream OS(Data); + B.write(OS); + + EXPECT_EQ(Expected, Data); EXPECT_EQ(1U, B.getOffset("foo")); EXPECT_EQ(5U, B.getOffset("bar")); EXPECT_EQ(9U, B.getOffset("foobar"));