forked from OSchip/llvm-project
Reapply ~"Bitcode: Collect all MDString records into a single blob"
Spiritually reapply commit r264409 (reverted in r264410), albeit with a bit of a redesign. Firstly, avoid splitting the big blob into multiple chunks of strings. r264409 imposed an arbitrary limit to avoid a massive allocation on the shared 'Record' SmallVector. The bug with that commit only reproduced when there were more than "chunk-size" strings. A test for this would have been useless long-term, since we're liable to adjust the chunk-size in the future. Thus, eliminate the motivation for chunk-ing by storing the string sizes in the blob. Here's the layout: vbr6: # of strings vbr6: offset-to-blob blob: [vbr6]: string lengths [char]: concatenated strings Secondly, make the output of llvm-bcanalyzer readable. I noticed when debugging r264409 that llvm-bcanalyzer was outputting a massive blob all in one line. Past a small number, the strings were impossible to split in my head, and the lines were way too long. This version adds support in llvm-bcanalyzer for pretty-printing. <STRINGS abbrevid=4 op0=3 op1=9/> num-strings = 3 { 'abc' 'def' 'ghi' } From the original commit: Inspired by Mehdi's similar patch, http://reviews.llvm.org/D18342, this should (a) slightly reduce bitcode size, since there is less record overhead, and (b) greatly improve reading speed, since blobs are super cheap to deserialize. llvm-svn: 264551
This commit is contained in:
parent
376fa26060
commit
6565a0d4b2
|
@ -209,7 +209,7 @@ enum { BITCODE_CURRENT_EPOCH = 0 };
|
|||
};
|
||||
|
||||
enum MetadataCodes {
|
||||
METADATA_STRING = 1, // MDSTRING: [values]
|
||||
METADATA_STRING_OLD = 1, // MDSTRING: [values]
|
||||
METADATA_VALUE = 2, // VALUE: [type num, value num]
|
||||
METADATA_NODE = 3, // NODE: [n x md num]
|
||||
METADATA_NAME = 4, // STRING: [values]
|
||||
|
@ -243,6 +243,7 @@ enum { BITCODE_CURRENT_EPOCH = 0 };
|
|||
METADATA_MODULE = 32, // [distinct, scope, name, ...]
|
||||
METADATA_MACRO = 33, // [distinct, macinfo, line, name, value]
|
||||
METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...]
|
||||
METADATA_STRINGS = 35, // [count, offset] blob([lengths][chars])
|
||||
};
|
||||
|
||||
// The constants block (CONSTANTS_BLOCK_ID) describes emission for each
|
||||
|
|
|
@ -396,6 +396,9 @@ private:
|
|||
std::error_code globalCleanup();
|
||||
std::error_code resolveGlobalAndAliasInits();
|
||||
std::error_code parseMetadata(bool ModuleLevel = false);
|
||||
std::error_code parseMetadataStrings(ArrayRef<uint64_t> Record,
|
||||
StringRef Blob,
|
||||
unsigned &NextMetadataNo);
|
||||
std::error_code parseMetadataKinds();
|
||||
std::error_code parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record);
|
||||
std::error_code parseMetadataAttachment(Function &F);
|
||||
|
@ -1883,6 +1886,47 @@ BitcodeReader::parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record) {
|
|||
|
||||
static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; }
|
||||
|
||||
std::error_code BitcodeReader::parseMetadataStrings(ArrayRef<uint64_t> Record,
|
||||
StringRef Blob,
|
||||
unsigned &NextMetadataNo) {
|
||||
// All the MDStrings in the block are emitted together in a single
|
||||
// record. The strings are concatenated and stored in a blob along with
|
||||
// their sizes.
|
||||
if (Record.size() != 2)
|
||||
return error("Invalid record: metadata strings layout");
|
||||
|
||||
unsigned NumStrings = Record[0];
|
||||
unsigned StringsOffset = Record[1];
|
||||
if (!NumStrings)
|
||||
return error("Invalid record: metadata strings with no strings");
|
||||
if (StringsOffset >= Blob.size())
|
||||
return error("Invalid record: metadata strings corrupt offset");
|
||||
|
||||
StringRef Lengths = Blob.slice(0, StringsOffset);
|
||||
SimpleBitstreamCursor R(*StreamFile);
|
||||
R.jumpToPointer(Lengths.begin());
|
||||
|
||||
// Ensure that Blob doesn't get invalidated, even if this is reading from
|
||||
// a StreamingMemoryObject with corrupt data.
|
||||
R.setArtificialByteLimit(R.getCurrentByteNo() + StringsOffset);
|
||||
|
||||
StringRef Strings = Blob.drop_front(StringsOffset);
|
||||
do {
|
||||
if (R.AtEndOfStream())
|
||||
return error("Invalid record: metadata strings bad length");
|
||||
|
||||
unsigned Size = R.ReadVBR(6);
|
||||
if (Strings.size() < Size)
|
||||
return error("Invalid record: metadata strings truncated chars");
|
||||
|
||||
MetadataList.assignValue(MDString::get(Context, Strings.slice(0, Size)),
|
||||
NextMetadataNo++);
|
||||
Strings = Strings.drop_front(Size);
|
||||
} while (--NumStrings);
|
||||
|
||||
return std::error_code();
|
||||
}
|
||||
|
||||
/// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing
|
||||
/// module level metadata.
|
||||
std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
|
||||
|
@ -1929,7 +1973,8 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
|
|||
|
||||
// Read a record.
|
||||
Record.clear();
|
||||
unsigned Code = Stream.readRecord(Entry.ID, Record);
|
||||
StringRef Blob;
|
||||
unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
|
||||
bool IsDistinct = false;
|
||||
switch (Code) {
|
||||
default: // Default behavior: ignore.
|
||||
|
@ -2363,7 +2408,7 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
|
|||
NextMetadataNo++);
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_STRING: {
|
||||
case bitc::METADATA_STRING_OLD: {
|
||||
std::string String(Record.begin(), Record.end());
|
||||
|
||||
// Test for upgrading !llvm.loop.
|
||||
|
@ -2373,6 +2418,11 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
|
|||
MetadataList.assignValue(MD, NextMetadataNo++);
|
||||
break;
|
||||
}
|
||||
case bitc::METADATA_STRINGS:
|
||||
if (std::error_code EC =
|
||||
parseMetadataStrings(Record, Blob, NextMetadataNo))
|
||||
return EC;
|
||||
break;
|
||||
case bitc::METADATA_KIND: {
|
||||
// Support older bitcode files that had METADATA_KIND records in a
|
||||
// block with METADATA_BLOCK_ID.
|
||||
|
|
|
@ -1347,31 +1347,65 @@ static void writeNamedMetadata(const Module &M, const ValueEnumerator &VE,
|
|||
}
|
||||
}
|
||||
|
||||
static unsigned createMetadataStringsAbbrev(BitstreamWriter &Stream) {
|
||||
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
|
||||
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRINGS));
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of strings
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // offset to chars
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
|
||||
return Stream.EmitAbbrev(Abbv);
|
||||
}
|
||||
|
||||
/// Write out a record for MDString.
|
||||
///
|
||||
/// All the metadata strings in a metadata block are emitted in a single
|
||||
/// record. The sizes and strings themselves are shoved into a blob.
|
||||
static void writeMetadataStrings(ArrayRef<const Metadata *> Strings,
|
||||
BitstreamWriter &Stream,
|
||||
SmallVectorImpl<uint64_t> &Record) {
|
||||
if (Strings.empty())
|
||||
return;
|
||||
|
||||
// Start the record with the number of strings.
|
||||
Record.push_back(bitc::METADATA_STRINGS);
|
||||
Record.push_back(Strings.size());
|
||||
|
||||
// Emit the sizes of the strings in the blob.
|
||||
SmallString<256> Blob;
|
||||
{
|
||||
BitstreamWriter W(Blob);
|
||||
for (const Metadata *MD : Strings)
|
||||
W.EmitVBR(cast<MDString>(MD)->getLength(), 6);
|
||||
W.FlushToWord();
|
||||
}
|
||||
|
||||
// Add the offset to the strings to the record.
|
||||
Record.push_back(Blob.size());
|
||||
|
||||
// Add the strings to the blob.
|
||||
for (const Metadata *MD : Strings)
|
||||
Blob.append(cast<MDString>(MD)->getString());
|
||||
|
||||
// Emit the final record.
|
||||
Stream.EmitRecordWithBlob(createMetadataStringsAbbrev(Stream), Record, Blob);
|
||||
Record.clear();
|
||||
}
|
||||
|
||||
static void WriteModuleMetadata(const Module &M,
|
||||
const ValueEnumerator &VE,
|
||||
BitstreamWriter &Stream) {
|
||||
const auto &MDs = VE.getMDs();
|
||||
if (MDs.empty() && M.named_metadata_empty())
|
||||
if (VE.getMDs().empty() && M.named_metadata_empty())
|
||||
return;
|
||||
|
||||
Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
|
||||
|
||||
unsigned MDSAbbrev = 0;
|
||||
if (VE.hasMDString()) {
|
||||
// Abbrev for METADATA_STRING.
|
||||
BitCodeAbbrev *Abbv = new BitCodeAbbrev();
|
||||
Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRING));
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
|
||||
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
|
||||
MDSAbbrev = Stream.EmitAbbrev(Abbv);
|
||||
}
|
||||
|
||||
// Initialize MDNode abbreviations.
|
||||
#define HANDLE_MDNODE_LEAF(CLASS) unsigned CLASS##Abbrev = 0;
|
||||
#include "llvm/IR/Metadata.def"
|
||||
|
||||
SmallVector<uint64_t, 64> Record;
|
||||
for (const Metadata *MD : MDs) {
|
||||
writeMetadataStrings(VE.getMDStrings(), Stream, Record);
|
||||
for (const Metadata *MD : VE.getNonMDStrings()) {
|
||||
if (const MDNode *N = dyn_cast<MDNode>(MD)) {
|
||||
assert(N->isResolved() && "Expected forward references to be resolved");
|
||||
|
||||
|
@ -1385,17 +1419,7 @@ static void WriteModuleMetadata(const Module &M,
|
|||
#include "llvm/IR/Metadata.def"
|
||||
}
|
||||
}
|
||||
if (const auto *MDC = dyn_cast<ConstantAsMetadata>(MD)) {
|
||||
WriteValueAsMetadata(MDC, VE, Stream, Record);
|
||||
continue;
|
||||
}
|
||||
const MDString *MDS = cast<MDString>(MD);
|
||||
// Code: [strchar x N]
|
||||
Record.append(MDS->bytes_begin(), MDS->bytes_end());
|
||||
|
||||
// Emit the finished record.
|
||||
Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev);
|
||||
Record.clear();
|
||||
WriteValueAsMetadata(cast<ConstantAsMetadata>(MD), VE, Stream, Record);
|
||||
}
|
||||
|
||||
writeNamedMetadata(M, VE, Stream, Record);
|
||||
|
|
|
@ -280,8 +280,7 @@ static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
|
|||
|
||||
ValueEnumerator::ValueEnumerator(const Module &M,
|
||||
bool ShouldPreserveUseListOrder)
|
||||
: HasMDString(false),
|
||||
ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
|
||||
: ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
|
||||
if (ShouldPreserveUseListOrder)
|
||||
UseListOrders = predictUseListOrder(M);
|
||||
|
||||
|
@ -375,6 +374,9 @@ ValueEnumerator::ValueEnumerator(const Module &M,
|
|||
|
||||
// Optimize constant ordering.
|
||||
OptimizeConstants(FirstConstant, Values.size());
|
||||
|
||||
// Organize metadata ordering.
|
||||
organizeMetadata();
|
||||
}
|
||||
|
||||
unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
|
||||
|
@ -530,8 +532,8 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) {
|
|||
EnumerateMDNodeOperands(N);
|
||||
else if (auto *C = dyn_cast<ConstantAsMetadata>(MD))
|
||||
EnumerateValue(C->getValue());
|
||||
|
||||
HasMDString |= isa<MDString>(MD);
|
||||
else
|
||||
++NumMDStrings;
|
||||
|
||||
// Replace the dummy ID inserted above with the correct one. MetadataMap may
|
||||
// have changed by inserting operands, so we need a fresh lookup here.
|
||||
|
@ -557,6 +559,19 @@ void ValueEnumerator::EnumerateFunctionLocalMetadata(
|
|||
FunctionLocalMDs.push_back(Local);
|
||||
}
|
||||
|
||||
void ValueEnumerator::organizeMetadata() {
|
||||
if (!NumMDStrings)
|
||||
return;
|
||||
|
||||
// Put the strings first.
|
||||
std::stable_partition(MDs.begin(), MDs.end(),
|
||||
[](const Metadata *MD) { return isa<MDString>(MD); });
|
||||
|
||||
// Renumber.
|
||||
for (unsigned I = 0, E = MDs.size(); I != E; ++I)
|
||||
MetadataMap[MDs[I]] = I + 1;
|
||||
}
|
||||
|
||||
void ValueEnumerator::EnumerateValue(const Value *V) {
|
||||
assert(!V->getType()->isVoidTy() && "Can't insert void values!");
|
||||
assert(!isa<MetadataAsValue>(V) && "EnumerateValue doesn't handle Metadata!");
|
||||
|
|
|
@ -66,7 +66,7 @@ private:
|
|||
SmallVector<const LocalAsMetadata *, 8> FunctionLocalMDs;
|
||||
typedef DenseMap<const Metadata *, unsigned> MetadataMapType;
|
||||
MetadataMapType MetadataMap;
|
||||
bool HasMDString;
|
||||
unsigned NumMDStrings = 0;
|
||||
bool ShouldPreserveUseListOrder;
|
||||
|
||||
typedef DenseMap<AttributeSet, unsigned> AttributeGroupMapType;
|
||||
|
@ -121,8 +121,6 @@ public:
|
|||
}
|
||||
unsigned numMDs() const { return MDs.size(); }
|
||||
|
||||
bool hasMDString() const { return HasMDString; }
|
||||
|
||||
bool shouldPreserveUseListOrder() const { return ShouldPreserveUseListOrder; }
|
||||
|
||||
unsigned getTypeID(Type *T) const {
|
||||
|
@ -157,9 +155,16 @@ public:
|
|||
|
||||
const ValueList &getValues() const { return Values; }
|
||||
const std::vector<const Metadata *> &getMDs() const { return MDs; }
|
||||
ArrayRef<const Metadata *> getMDStrings() const {
|
||||
return makeArrayRef(MDs).slice(0, NumMDStrings);
|
||||
}
|
||||
ArrayRef<const Metadata *> getNonMDStrings() const {
|
||||
return makeArrayRef(MDs).slice(NumMDStrings);
|
||||
}
|
||||
const SmallVectorImpl<const LocalAsMetadata *> &getFunctionLocalMDs() const {
|
||||
return FunctionLocalMDs;
|
||||
}
|
||||
|
||||
const TypeList &getTypes() const { return Types; }
|
||||
const std::vector<const BasicBlock*> &getBasicBlocks() const {
|
||||
return BasicBlocks;
|
||||
|
@ -189,6 +194,10 @@ public:
|
|||
private:
|
||||
void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
|
||||
|
||||
// Reorder the reachable metadata. This is not just an optimization, but is
|
||||
// mandatory for emitting MDString correctly.
|
||||
void organizeMetadata();
|
||||
|
||||
void EnumerateMDNodeOperands(const MDNode *N);
|
||||
void EnumerateMetadata(const Metadata *MD);
|
||||
void EnumerateFunctionLocalMetadata(const LocalAsMetadata *Local);
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
; RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s
|
||||
|
||||
!named = !{!0}
|
||||
|
||||
; CHECK: <METADATA_BLOCK
|
||||
; CHECK-NEXT: <STRINGS
|
||||
; CHECK-SAME: /> num-strings = 3 {
|
||||
; CHECK-NEXT: 'a'
|
||||
; CHECK-NEXT: 'b'
|
||||
; CHECK-NEXT: 'c'
|
||||
; CHECK-NEXT: }
|
||||
!0 = !{!"a", !"b", !"c"}
|
|
@ -312,7 +312,8 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
|
|||
case bitc::METADATA_BLOCK_ID:
|
||||
switch(CodeID) {
|
||||
default:return nullptr;
|
||||
STRINGIFY_CODE(METADATA, STRING)
|
||||
STRINGIFY_CODE(METADATA, STRING_OLD)
|
||||
STRINGIFY_CODE(METADATA, STRINGS)
|
||||
STRINGIFY_CODE(METADATA, NAME)
|
||||
STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK
|
||||
STRINGIFY_CODE(METADATA, NODE)
|
||||
|
@ -404,6 +405,57 @@ static bool Error(const Twine &Err) {
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool decodeMetadataStringsBlob(BitstreamReader &Reader, StringRef Indent,
|
||||
ArrayRef<uint64_t> Record,
|
||||
StringRef Blob) {
|
||||
if (Blob.empty())
|
||||
return true;
|
||||
|
||||
if (Record.size() != 2)
|
||||
return true;
|
||||
|
||||
unsigned NumStrings = Record[0];
|
||||
unsigned StringsOffset = Record[1];
|
||||
outs() << " num-strings = " << NumStrings << " {\n";
|
||||
|
||||
StringRef Lengths = Blob.slice(0, StringsOffset);
|
||||
SimpleBitstreamCursor R(Reader);
|
||||
R.jumpToPointer(Lengths.begin());
|
||||
|
||||
// Ensure that Blob doesn't get invalidated, even if this is reading from a
|
||||
// StreamingMemoryObject with corrupt data.
|
||||
R.setArtificialByteLimit(R.getCurrentByteNo() + StringsOffset);
|
||||
|
||||
StringRef Strings = Blob.drop_front(StringsOffset);
|
||||
do {
|
||||
if (R.AtEndOfStream())
|
||||
return Error("bad length");
|
||||
|
||||
unsigned Size = R.ReadVBR(6);
|
||||
if (Strings.size() < Size)
|
||||
return Error("truncated chars");
|
||||
|
||||
outs() << Indent << " '";
|
||||
outs().write_escaped(Strings.slice(0, Size), /*hex=*/true);
|
||||
outs() << "'\n";
|
||||
Strings = Strings.drop_front(Size);
|
||||
} while (--NumStrings);
|
||||
|
||||
outs() << Indent << " }";
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool decodeBlob(unsigned Code, unsigned BlockID, BitstreamReader &Reader,
|
||||
StringRef Indent, ArrayRef<uint64_t> Record,
|
||||
StringRef Blob) {
|
||||
if (BlockID != bitc::METADATA_BLOCK_ID)
|
||||
return true;
|
||||
if (Code != bitc::METADATA_STRINGS)
|
||||
return true;
|
||||
|
||||
return decodeMetadataStringsBlob(Reader, Indent, Record, Blob);
|
||||
}
|
||||
|
||||
/// ParseBlock - Read a block, updating statistics, etc.
|
||||
static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
|
||||
unsigned IndentLevel, CurStreamTypeType CurStreamType) {
|
||||
|
@ -557,7 +609,8 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
|
|||
}
|
||||
}
|
||||
|
||||
if (Blob.data()) {
|
||||
if (Blob.data() && decodeBlob(Code, BlockID, *Stream.getBitStreamReader(),
|
||||
Indent, Record, Blob)) {
|
||||
outs() << " blob data = ";
|
||||
if (ShowBinaryBlobs) {
|
||||
outs() << "'";
|
||||
|
|
Loading…
Reference in New Issue