diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index 8a74a5553083..a5b1fac933ac 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -410,6 +410,28 @@ struct IRInstructionMapper { /// The \ref isSimilar function compares each IRInstructionData against one /// another and if we have the same sequences of IRInstructionData that would /// create the same hash, we have similar IRSimilarityCandidates. +/// +/// We can also compare the structure of IRSimilarityCandidates. If we can +/// create a mapping of registers in the region contained by one +/// IRSimilarityCandidate to the region contained by different +/// IRSimilarityCandidate, they can be considered structurally similar. +/// +/// IRSimilarityCandidate1: IRSimilarityCandidate2: +/// %add1 = add i32 %a, %b %add1 = add i32 %d, %e +/// %add2 = add i32 %a, %c %add2 = add i32 %d, %f +/// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4 +/// +/// Can have the following mapping from candidate to candidate of: +/// %a -> %d, %b -> %e, %c -> %f, c1 -> c3, c2 -> c4 +/// and can be considered similar. +/// +/// IRSimilarityCandidate1: IRSimilarityCandidate2: +/// %add1 = add i32 %a, %b %add1 = add i32 %d, c4 +/// %add2 = add i32 %a, %c %add2 = add i32 %d, %f +/// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4 +/// +/// We cannot create the same mapping since the use of c4 is not used in the +/// same way as %b or c2. class IRSimilarityCandidate { private: /// The start index of this IRSimilarityCandidate in the instruction list. @@ -448,6 +470,37 @@ public: /// IRInstructionData in \p B. static bool isSimilar(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B); + + /// \param A - The first IRInstructionCandidate to compare. + /// \param B - The second IRInstructionCandidate to compare. + /// \returns True when every IRInstructionData in \p A is structurally similar + /// to \p B. + static bool compareStructure(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B); + + struct OperandMapping { + /// The IRSimilarityCandidate that holds the instruction the OperVals were + /// pulled from. + const IRSimilarityCandidate &IRSC; + + /// The operand values to be analyzed. + ArrayRef &OperVals; + + /// The current mapping of global value numbers from one IRSimilarityCandidate + /// to another IRSimilarityCandidate. + DenseMap> &ValueNumberMapping; + }; + + /// Compare the operands in \p A and \p B and check that the current mapping + /// of global value numbers from \p A to \p B and \p B to \A is consistent. + /// + /// \param A - The first IRInstructionCandidate, operand values, and current + /// operand mappings to compare. + /// \param B - The second IRInstructionCandidate, operand values, and current + /// operand mappings to compare. + /// \returns true if the IRSimilarityCandidates operands are compatible. + static bool compareOperandMapping(OperandMapping A, OperandMapping B); + /// Compare the start and end indices of the two IRSimilarityCandidates for /// whether they overlap. If the start instruction of one /// IRSimilarityCandidate is less than the end instruction of the other, and @@ -521,6 +574,7 @@ public: iterator begin() const { return iterator(front()); } iterator end() const { return std::next(iterator(back())); } }; + } // end namespace IRSimilarity } // end namespace llvm diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index de9869211de4..baec0b2924ba 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -239,6 +239,160 @@ bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, }); } +/// Determine if operand number \p TargetArgVal is in the current mapping set +/// for operand number \p SourceArgVal. +/// +/// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global +/// value numbers from source IRSimilarityCandidate to target +/// IRSimilarityCandidate. +/// \param [in] SourceArgVal The global value number for an operand in the +/// in the original candidate. +/// \param [in] TargetArgVal The global value number for the corresponding +/// operand in the other candidate. +/// \returns True if there exists a mapping and false if not. +bool checkNumberingAndReplace( + DenseMap> &CurrentSrcTgtNumberMapping, + unsigned SourceArgVal, unsigned TargetArgVal) { + // We are given two unsigned integers representing the global values of + // the operands in different IRSimilarityCandidates and a current mapping + // between the two. + // + // Source Operand GVN: 1 + // Target Operand GVN: 2 + // CurrentMapping: {1: {1, 2}} + // + // Since we have mapping, and the target operand is contained in the set, we + // update it to: + // CurrentMapping: {1: {2}} + // and can return true. But, if the mapping was + // CurrentMapping: {1: {3}} + // we would return false. + + bool WasInserted; + DenseMap>::iterator Val; + + std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( + std::make_pair(SourceArgVal, DenseSet({TargetArgVal}))); + + // If we created a new mapping, then we are done. + if (WasInserted) + return true; + + // If there is more than one option in the mapping set, and the target value + // is included in the mapping set replace that set with one that only includes + // the target value, as it is the only valid mapping via the non commutative + // instruction. + + DenseSet &TargetSet = Val->second; + if (TargetSet.size() > 1 && TargetSet.find(TargetArgVal) != TargetSet.end()) { + TargetSet.clear(); + TargetSet.insert(TargetArgVal); + return true; + } + + // Return true if we can find the value in the set. + return TargetSet.find(TargetArgVal) != TargetSet.end(); +} + +bool IRSimilarityCandidate::compareOperandMapping(OperandMapping A, + OperandMapping B) { + // Iterators to keep track of where we are in the operands for each + // Instruction. + ArrayRef::iterator VItA = A.OperVals.begin(); + ArrayRef::iterator VItB = B.OperVals.begin(); + unsigned OperandLength = A.OperVals.size(); + + // For each operand, get the value numbering and ensure it is consistent. + for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { + unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second; + unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second; + + // Attempt to add a set with only the target value. If there is no mapping + // we can create it here. + // + // For an instruction like a subtraction: + // IRSimilarityCandidateA: IRSimilarityCandidateB: + // %resultA = sub %a, %b %resultB = sub %d, %e + // + // We map %a -> %d and %b -> %e. + // + // And check to see whether their mapping is consistent in + // checkNumberingAndReplace. + + if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB)) + return false; + + if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA)) + return false; + } + return true; +} + +bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B) { + if (A.getLength() != B.getLength()) + return false; + + if (A.ValueToNumber.size() != B.ValueToNumber.size()) + return false; + + iterator ItA = A.begin(); + iterator ItB = B.begin(); + + // These sets create a create a mapping between the values in one candidate + // to values in the other candidate. If we create a set with one element, + // and that same element maps to the original element in the candidate + // we have a good mapping. + DenseMap> ValueNumberMappingA; + DenseMap> ValueNumberMappingB; + DenseMap>::iterator ValueMappingIt; + + bool WasInserted; + + // Iterate over the instructions contained in each candidate + unsigned SectionLength = A.getStartIdx() + A.getLength(); + for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; + ItA++, ItB++, Loc++) { + // Make sure the instructions are similar to one another. + if (!isClose(*ItA, *ItB)) + return false; + + Instruction *IA = ItA->Inst; + Instruction *IB = ItB->Inst; + + if (!ItA->Legal || !ItB->Legal) + return false; + + // Get the operand sets for the instructions. + ArrayRef OperValsA = ItA->OperVals; + ArrayRef OperValsB = ItB->OperVals; + + unsigned InstValA = A.ValueToNumber.find(IA)->second; + unsigned InstValB = B.ValueToNumber.find(IB)->second; + + // Ensure that the mappings for the instructions exists. + std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( + std::make_pair(InstValA, DenseSet({InstValB}))); + if (!WasInserted && ValueMappingIt->second.find(InstValB) == + ValueMappingIt->second.end()) + return false; + + std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert( + std::make_pair(InstValB, DenseSet({InstValA}))); + if (!WasInserted && ValueMappingIt->second.find(InstValA) == + ValueMappingIt->second.end()) + return false; + + // TODO: Handle commutative instructions by mapping one operand to many + // operands instead only mapping a single operand to a single operand. + if (!compareOperandMapping({A, OperValsA, ValueNumberMappingA}, + {B, OperValsB, ValueNumberMappingB})) + return false; + } + + return true; +} + bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B) { auto DoesOverlap = [](const IRSimilarityCandidate &X, diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp index a30b9d5fdef4..4f5fcbb164e1 100644 --- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp +++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "gtest/gtest.h" #include "llvm/Analysis/IRSimilarityIdentifier.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/SourceMgr.h" +#include "gtest/gtest.h" using namespace llvm; using namespace IRSimilarity; @@ -1311,7 +1311,8 @@ TEST(IRInstructionMapper, RepeatedIllegalLength) { // A helper function that accepts an instruction list from a module made up of // two blocks of two legal instructions and terminator, and checks them for // instruction similarity. -static bool longSimCandCompare(std::vector &InstrList) { +static bool longSimCandCompare(std::vector &InstrList, + bool Structure = false) { std::vector::iterator Start, End; Start = InstrList.begin(); @@ -1326,6 +1327,8 @@ static bool longSimCandCompare(std::vector &InstrList) { std::advance(Start, 3); std::advance(End, 4); IRSimilarityCandidate Cand2(3, 2, *Start, *End); + if (Structure) + return IRSimilarityCandidate::compareStructure(Cand1, Cand2); return IRSimilarityCandidate::isSimilar(Cand1, Cand2); } @@ -1523,3 +1526,102 @@ TEST(IRSimilarityCandidate, IllegalInCandidate) { IRSimilarityCandidate Cand2(3, 3, *Start, *End); ASSERT_FALSE(IRSimilarityCandidate::isSimilar(Cand1, Cand2)); } + +// Checks that different structure, in this case, where we introduce a new +// needed input in one region, is recognized as different. +TEST(IRSimilarityCandidate, DifferentStructure) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + ret i32 0 + bb1: + %2 = add i32 %a, %b + %3 = add i32 %b, %0 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(6)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_FALSE(longSimCandCompare(InstrList, true)); +} + +// Checks that the same structure is recognized between two candidates. The +// items %a and %b are used in the same way in both sets of instructions. +TEST(IRSimilarityCandidate, SameStructure) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = sub i32 %b, %a + ret i32 0 + bb1: + %2 = add i32 %a, %b + %3 = sub i32 %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(6)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_TRUE(longSimCandCompare(InstrList, true)); +} + +// Checks that the same structure is recognized between two candidates. While +// the input names are reversed, they still perform the same overall operation. +TEST(IRSimilarityCandidate, DifferentNameSameStructure) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + ret i32 0 + bb1: + %2 = add i32 %b, %a + %3 = add i32 %a, %b + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(6)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_TRUE(longSimCandCompare(InstrList, true)); +}