diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 26f0ac658849..eb448c03767b 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1447,8 +1447,7 @@ static void writeFunctionMetadata(const Function &F, const ValueEnumerator &VE, Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); SmallVector Record; - assert(VE.getMDStrings().empty() && - "Unexpected strings at the function-level"); + writeMetadataStrings(VE.getMDStrings(), Stream, Record); writeMetadataRecords(VE.getNonMDStrings(), VE, Stream, Record); Stream.ExitBlock(); } diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index 5c5f05ca8f96..91b523f05bce 100644 --- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -336,7 +336,7 @@ ValueEnumerator::ValueEnumerator(const Module &M, // Enumerate metadata attached to this function. F.getAllMetadata(MDs); for (const auto &I : MDs) - EnumerateMetadata(I.second); + EnumerateMetadata(&F, I.second); for (const BasicBlock &BB : F) for (const Instruction &I : BB) { @@ -351,7 +351,7 @@ ValueEnumerator::ValueEnumerator(const Module &M, if (isa(MD->getMetadata())) continue; - EnumerateMetadata(MD->getMetadata()); + EnumerateMetadata(&F, MD->getMetadata()); } EnumerateType(I.getType()); if (const CallInst *CI = dyn_cast(&I)) @@ -363,12 +363,12 @@ ValueEnumerator::ValueEnumerator(const Module &M, MDs.clear(); I.getAllMetadataOtherThanDebugLoc(MDs); for (unsigned i = 0, e = MDs.size(); i != e; ++i) - EnumerateMetadata(MDs[i].second); + EnumerateMetadata(&F, MDs[i].second); // Don't enumerate the location directly -- it has a special record // type -- but enumerate its operands. if (DILocation *L = I.getDebugLoc()) - EnumerateMDNodeOperands(L); + EnumerateMDNodeOperands(&F, L); } } @@ -447,8 +447,10 @@ void ValueEnumerator::print(raw_ostream &OS, const MetadataMapType &Map, OS << "Size: " << Map.size() << "\n"; for (auto I = Map.begin(), E = Map.end(); I != E; ++I) { const Metadata *MD = I->first; - OS << "Metadata: slot = " << I->second << "\n"; + OS << "Metadata: slot = " << I->second.ID << "\n"; + OS << "Metadata: function = " << I->second.F << "\n"; MD->print(OS); + OS << "\n"; } } @@ -500,22 +502,87 @@ void ValueEnumerator::EnumerateNamedMetadata(const Module &M) { void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) { for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) - EnumerateMetadata(MD->getOperand(i)); + EnumerateMetadata(nullptr, MD->getOperand(i)); +} + +unsigned ValueEnumerator::getMetadataFunctionID(const Function *F) const { + return F ? getValueID(F) + 1 : 0; +} + +void ValueEnumerator::EnumerateMDNodeOperands(const Function *F, + const MDNode *N) { + EnumerateMDNodeOperands(getMetadataFunctionID(F), N); +} + +void ValueEnumerator::EnumerateMetadata(const Function *F, const Metadata *MD) { + EnumerateMetadata(getMetadataFunctionID(F), MD); +} + +void ValueEnumerator::EnumerateFunctionLocalMetadata( + const Function &F, const LocalAsMetadata *Local) { + EnumerateFunctionLocalMetadata(getMetadataFunctionID(&F), Local); } /// EnumerateMDNodeOperands - Enumerate all non-function-local values /// and types referenced by the given MDNode. -void ValueEnumerator::EnumerateMDNodeOperands(const MDNode *N) { +void ValueEnumerator::EnumerateMDNodeOperands(unsigned F, const MDNode *N) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { Metadata *MD = N->getOperand(i); if (!MD) continue; assert(!isa(MD) && "MDNodes cannot be function-local"); - EnumerateMetadata(MD); + EnumerateMetadata(F, MD); } } -void ValueEnumerator::EnumerateMetadata(const Metadata *MD) { +bool ValueEnumerator::insertMetadata(unsigned F, const Metadata *MD) { + auto Insertion = MetadataMap.insert(std::make_pair(MD, MDIndex(F))); + if (Insertion.second) + return true; + + // Check whether F is a different function. + MDIndex &Entry = Insertion.first->second; + if (!Entry.hasDifferentFunction(F)) + return false; + + // Since MD was tagged from a different function entry point then it must + // already have an ID. + assert(Entry.ID && "Expected metadata to already be indexed"); + Entry.F = 0; + + // Drop the function from transitive operands. + if (auto *N = dyn_cast(MD)) + dropFunctionFromOps(*N); + + return false; +} + +void ValueEnumerator::dropFunctionFromOps(const MDNode &N) { + SmallVector WorkList; + WorkList.push_back(&N); + while (!WorkList.empty()) { + for (const Metadata *Op : WorkList.pop_back_val()->operands()) { + if (!Op) + continue; + + // All transitive operands of N should already have IDs. This should be + // a second traversal. + auto &Entry = MetadataMap[Op]; + assert(Entry.ID && "Expected metadata to already be indexed"); + + // Nothing to do if this operand isn't tagged. + if (!Entry.F) + continue; + + // Drop the tag, and if it's a node (with potential operands), queue it. + Entry.F = 0; + if (auto *OpN = dyn_cast(Op)) + WorkList.push_back(OpN); + } + } +} + +void ValueEnumerator::EnumerateMetadata(unsigned F, const Metadata *MD) { assert( (isa(MD) || isa(MD) || isa(MD)) && "Invalid metadata kind"); @@ -524,49 +591,120 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) { // EnumerateMDNodeOperands() from re-visiting MD in a cyclic graph. // // Return early if there's already an ID. - if (!MetadataMap.insert(std::make_pair(MD, 0)).second) + if (!insertMetadata(F, MD)) return; // Visit operands first to minimize RAUW. if (auto *N = dyn_cast(MD)) - EnumerateMDNodeOperands(N); + EnumerateMDNodeOperands(F, N); else if (auto *C = dyn_cast(MD)) EnumerateValue(C->getValue()); - else - ++NumMDStrings; - // Replace the dummy ID inserted above with the correct one. MetadataMap may - // have changed by inserting operands, so we need a fresh lookup here. + // Save the metadata. MDs.push_back(MD); - MetadataMap[MD] = MDs.size(); + MetadataMap[MD].ID = MDs.size(); } /// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata /// information reachable from the metadata. void ValueEnumerator::EnumerateFunctionLocalMetadata( - const LocalAsMetadata *Local) { + unsigned F, const LocalAsMetadata *Local) { + assert(F && "Expected a function"); + // Check to see if it's already in! - unsigned &MetadataID = MetadataMap[Local]; - if (MetadataID) + MDIndex &Index = MetadataMap[Local]; + if (Index.ID) { + assert(Index.F == F && "Expected the same function"); return; + } MDs.push_back(Local); - MetadataID = MDs.size(); + Index.F = F; + Index.ID = MDs.size(); EnumerateValue(Local->getValue()); } void ValueEnumerator::organizeMetadata() { - if (!NumMDStrings) + assert(MetadataMap.size() == MDs.size() && + "Metadata map and vector out of sync"); + + if (MDs.empty()) return; - // Put the strings first. - std::stable_partition(MDs.begin(), MDs.end(), - [](const Metadata *MD) { return isa(MD); }); + // Copy out the index information from MetadataMap in order to choose a new + // order. + SmallVector Order; + Order.reserve(MetadataMap.size()); + for (const Metadata *MD : MDs) + Order.push_back(MetadataMap.lookup(MD)); - // Renumber. - for (unsigned I = 0, E = MDs.size(); I != E; ++I) - MetadataMap[MDs[I]] = I + 1; + // Partition: + // - by function, then + // - by isa + // and then sort by the original/current ID. Since the IDs are guaranteed to + // be unique, the result of std::sort will be deterministic. There's no need + // for std::stable_sort. + std::sort(Order.begin(), Order.end(), [this](MDIndex LHS, MDIndex RHS) { + return std::make_tuple(LHS.F, !isa(LHS.get(MDs)), LHS.ID) < + std::make_tuple(RHS.F, !isa(RHS.get(MDs)), RHS.ID); + }); + + // Return early if nothing is moving to functions and there are no strings. + if (!Order.back().F && !isa(Order.front().get(MDs))) + return; + + // Rebuild MDs, index the metadata ranges for each function in FunctionMDs, + // and fix up MetadataMap. + std::vector OldMDs = std::move(MDs); + MDs.reserve(OldMDs.size()); + for (unsigned I = 0, E = Order.size(); I != E && !Order[I].F; ++I) { + auto *MD = Order[I].get(OldMDs); + MDs.push_back(MD); + MetadataMap[MD].ID = I + 1; + if (isa(MD)) + ++NumMDStrings; + } + + // Return early if there's nothing for the functions. + if (MDs.size() == Order.size()) + return; + + // Build the function metadata ranges. + MDRange R; + FunctionMDs.reserve(OldMDs.size()); + unsigned PrevF = 0; + for (unsigned I = MDs.size(), E = Order.size(), ID = MDs.size(); I != E; + ++I) { + unsigned F = Order[I].F; + if (!PrevF) { + PrevF = F; + } else if (PrevF != F) { + R.Last = FunctionMDs.size(); + std::swap(R, FunctionMDInfo[PrevF]); + R.First = FunctionMDs.size(); + + ID = MDs.size(); + PrevF = F; + } + + auto *MD = Order[I].get(OldMDs); + FunctionMDs.push_back(MD); + MetadataMap[MD].ID = ++ID; + if (isa(MD)) + ++R.NumStrings; + } + R.Last = FunctionMDs.size(); + FunctionMDInfo[PrevF] = R; +} + +void ValueEnumerator::incorporateFunctionMetadata(const Function &F) { + NumModuleMDs = MDs.size(); + + auto R = FunctionMDInfo.lookup(getValueID(&F) + 1); + NumMDStrings = R.NumStrings; + MDs.insert(MDs.end(), FunctionMDs.begin() + R.First, + FunctionMDs.begin() + R.Last); } void ValueEnumerator::EnumerateValue(const Value *V) { @@ -708,8 +846,10 @@ void ValueEnumerator::EnumerateAttributes(AttributeSet PAL) { void ValueEnumerator::incorporateFunction(const Function &F) { InstructionCount = 0; NumModuleValues = Values.size(); - NumModuleMDs = MDs.size(); - NumMDStrings = 0; + + // Add global metadata to the function block. This doesn't include + // LocalAsMetadata. + incorporateFunctionMetadata(F); // Adding function arguments to the value table. for (const auto &I : F.args()) @@ -755,7 +895,7 @@ void ValueEnumerator::incorporateFunction(const Function &F) { // Add all of the function-local metadata. for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i) - EnumerateFunctionLocalMetadata(FnLocalMDVector[i]); + EnumerateFunctionLocalMetadata(F, FnLocalMDVector[i]); } void ValueEnumerator::purgeFunction() { @@ -770,6 +910,7 @@ void ValueEnumerator::purgeFunction() { Values.resize(NumModuleValues); MDs.resize(NumModuleMDs); BasicBlocks.clear(); + NumMDStrings = 0; } static void IncorporateFunctionInfoGlobalBBIDs(const Function *F, diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.h b/llvm/lib/Bitcode/Writer/ValueEnumerator.h index 4c5c03d13e2e..ade52a90060d 100644 --- a/llvm/lib/Bitcode/Writer/ValueEnumerator.h +++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.h @@ -63,8 +63,42 @@ private: ComdatSetType Comdats; std::vector MDs; - typedef DenseMap MetadataMapType; + std::vector FunctionMDs; + + /// Index of information about a piece of metadata. + struct MDIndex { + unsigned F = 0; ///< The ID of the function for this metadata, if any. + unsigned ID = 0; ///< The implicit ID of this metadata in bitcode. + + MDIndex() = default; + explicit MDIndex(unsigned F) : F(F) {} + + /// Check if this has a function tag, and it's different from NewF. + bool hasDifferentFunction(unsigned NewF) const { return F && F != NewF; } + + /// Fetch the MD this references out of the given metadata array. + const Metadata *get(ArrayRef MDs) const { + assert(ID && "Expected non-zero ID"); + assert(ID <= MDs.size() && "Expected valid ID"); + return MDs[ID - 1]; + } + }; + typedef DenseMap MetadataMapType; MetadataMapType MetadataMap; + + /// Range of metadata IDs, as a half-open range. + struct MDRange { + unsigned First = 0; + unsigned Last = 0; + + /// Number of strings in the prefix of the metadata range. + unsigned NumStrings = 0; + + MDRange() = default; + explicit MDRange(unsigned First) : First(First) {} + }; + SmallDenseMap FunctionMDInfo; + bool ShouldPreserveUseListOrder; typedef DenseMap AttributeGroupMapType; @@ -116,7 +150,7 @@ public: return ID - 1; } unsigned getMetadataOrNullID(const Metadata *MD) const { - return MetadataMap.lookup(MD); + return MetadataMap.lookup(MD).ID; } unsigned numMDs() const { return MDs.size(); } @@ -196,13 +230,31 @@ public: private: void OptimizeConstants(unsigned CstStart, unsigned CstEnd); - // Reorder the reachable metadata. This is not just an optimization, but is - // mandatory for emitting MDString correctly. + /// Reorder the reachable metadata. + /// + /// This is not just an optimization, but is mandatory for emitting MDString + /// correctly. void organizeMetadata(); - void EnumerateMDNodeOperands(const MDNode *N); - void EnumerateMetadata(const Metadata *MD); - void EnumerateFunctionLocalMetadata(const LocalAsMetadata *Local); + /// Drop the function tag from the transitive operands of the given node. + void dropFunctionFromOps(const MDNode &N); + + /// Incorporate the function metadata. + /// + /// This should be called before enumerating LocalAsMetadata for the + /// function. + void incorporateFunctionMetadata(const Function &F); + + bool insertMetadata(unsigned F, const Metadata *MD); + + unsigned getMetadataFunctionID(const Function *F) const; + void EnumerateMDNodeOperands(const Function *F, const MDNode *N); + void EnumerateMDNodeOperands(unsigned F, const MDNode *N); + void EnumerateMetadata(const Function *F, const Metadata *MD); + void EnumerateMetadata(unsigned F, const Metadata *MD); + void EnumerateFunctionLocalMetadata(const Function &F, + const LocalAsMetadata *Local); + void EnumerateFunctionLocalMetadata(unsigned F, const LocalAsMetadata *Local); void EnumerateNamedMDNode(const NamedMDNode *NMD); void EnumerateValue(const Value *V); void EnumerateType(Type *T); diff --git a/llvm/test/Bitcode/metadata-function-blocks.ll b/llvm/test/Bitcode/metadata-function-blocks.ll new file mode 100644 index 000000000000..f3e83c5074d1 --- /dev/null +++ b/llvm/test/Bitcode/metadata-function-blocks.ll @@ -0,0 +1,75 @@ +; RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s +; Test that metadata only used by a single function is serialized in that +; function instead of in the global pool. +; +; In order to make the bitcode records easy to follow, nodes in this testcase +; are named after the ids they are given in the bitcode. Nodes local to a +; function have offsets of 100 or 200 (depending on the function) so that they +; remain unique within this textual IR. + +; Check for strings in the global pool. +; CHECK: num-strings = 3 { +; CHECK-NEXT: 'named' +; CHECK-NEXT: 'named and foo' +; CHECK-NEXT: 'foo and bar' +; CHECK-NEXT: } + +; Each node gets a new number. Bottom-up traversal of nodes. +!named = !{!6} + +; CHECK-NEXT: +!4 = !{!"named"} + +; CHECK-NEXT: +!5 = !{!"named and foo"} + +; CHECK-NEXT: +!6 = !{!"named", !4, !5} + +; CHECK-NEXT: +!7 = !{!"foo and bar"} + +; CHECK-NOT: num-strings = 1 { +; CHECK-NEXT: 'foo' +; CHECK-NEXT: } + +; Function-local nodes start at 9 (strings at 8). +; CHECK-NEXT: +!109 = !{!"foo"} + +; CHECK-NEXT: +!110 = !{!"foo", !"foo and bar", !109, !7, !5} + +; CHECK-NEXT: num-strings = 1 { +; CHECK-NEXT: 'bar' +; CHECK-NEXT: } + +; Function-local nodes start at 9 (strings at 8). +; CHECK-NEXT: +!209 = !{!"bar"} + +; CHECK-NEXT: +!210 = !{!"bar", !"foo and bar", !209, !7} + +; CHECK-NEXT: