From 6ada9e516f495eb6aa63e0c3e3edf3e3ae3c1a63 Mon Sep 17 00:00:00 2001 From: Andrew Litteken Date: Tue, 15 Sep 2020 17:30:31 -0500 Subject: [PATCH] [IRSim] Adding IRSimilarityCandidate that contains a region of IRInstructionData. The IRSimilarityCandidate is a container to hold a region of IRInstructions and offer interfaces for the starting instruction, ending instruction, parent function, length. It also assigns a global value number for each unique instance of a value in the region. It also contains an interface to compare two IRSimilarity as to whether they have the same sequence of similar instructions. Tests for whether the instructions are similar are found in unittests/Analysis/IRSimilarityIdentifierTest.cpp. Recommit of: 4944bb190fed8861d4d043eaf45e3c1e12aa2dc5 Differential Revision: https://reviews.llvm.org/D86970 --- .../llvm/Analysis/IRSimilarityIdentifier.h | 135 ++++++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp | 88 ++++ .../Analysis/IRSimilarityIdentifierTest.cpp | 450 ++++++++++++++++-- 3 files changed, 622 insertions(+), 51 deletions(-) diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index 457b867f345e..8a74a5553083 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -37,6 +37,10 @@ // or comparison predicate. These are used to create a hash to map instructions // to integers to be used in similarity matching in sequences of instructions // +// Terminology: +// An IRSimilarityCandidate is a region of IRInstructionData (wrapped +// Instructions), usually used to denote a region of similarity has been found. +// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H @@ -386,6 +390,137 @@ struct IRInstructionMapper { InstructionClassification InstClassifier; }; +/// This is a class that wraps a range of IRInstructionData from one point to +/// another in the vector of IRInstructionData, which is a region of the +/// program. It is also responsible for defining the structure within this +/// region of instructions. +/// +/// The structure of a region is defined through a value numbering system +/// assigned to each unique value in a region at the creation of the +/// IRSimilarityCandidate. +/// +/// For example, for each Instruction we add a mapping for each new +/// value seen in that Instruction. +/// IR: Mapping Added: +/// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 +/// %add2 = add i32 %a, %1 %add2 -> 4 +/// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 +/// +/// We can compare IRSimilarityCandidates against one another. +/// The \ref isSimilar function compares each IRInstructionData against one +/// another and if we have the same sequences of IRInstructionData that would +/// create the same hash, we have similar IRSimilarityCandidates. +class IRSimilarityCandidate { +private: + /// The start index of this IRSimilarityCandidate in the instruction list. + unsigned StartIdx = 0; + + /// The number of instructions in this IRSimilarityCandidate. + unsigned Len = 0; + + /// The first instruction in this IRSimilarityCandidate. + IRInstructionData *FirstInst = nullptr; + + /// The last instruction in this IRSimilarityCandidate. + IRInstructionData *LastInst = nullptr; + + /// Global Value Numbering structures + /// @{ + /// Stores the mapping of the value to the number assigned to it in the + /// IRSimilarityCandidate. + DenseMap ValueToNumber; + /// Stores the mapping of the number to the value assigned this number. + DenseMap NumberToValue; + /// @} + +public: + /// \param StartIdx - The starting location of the region. + /// \param StartIdx - The length of the region. + /// \param FirstInstIt - The starting IRInstructionData of the region. + /// \param LastInstIt - The ending IRInstructionData of the region. + IRSimilarityCandidate(unsigned StartIdx, unsigned Len, + IRInstructionData *FirstInstIt, + IRInstructionData *LastInstIt); + + /// \param A - The first IRInstructionCandidate to compare. + /// \param B - The second IRInstructionCandidate to compare. + /// \returns True when every IRInstructionData in \p A is similar to every + /// IRInstructionData in \p B. + static bool isSimilar(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B); + /// Compare the start and end indices of the two IRSimilarityCandidates for + /// whether they overlap. If the start instruction of one + /// IRSimilarityCandidate is less than the end instruction of the other, and + /// the start instruction of one is greater than the start instruction of the + /// other, they overlap. + /// + /// \returns true if the IRSimilarityCandidates do not have overlapping + /// instructions. + static bool overlap(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B); + + /// \returns the number of instructions in this Candidate. + unsigned getLength() const { return Len; } + + /// \returns the start index of this IRSimilarityCandidate. + unsigned getStartIdx() const { return StartIdx; } + + /// \returns the end index of this IRSimilarityCandidate. + unsigned getEndIdx() const { return StartIdx + Len - 1; } + + /// \returns The first IRInstructionData. + IRInstructionData *front() const { return FirstInst; } + /// \returns The last IRInstructionData. + IRInstructionData *back() const { return LastInst; } + + /// \returns The first Instruction. + Instruction *frontInstruction() { return FirstInst->Inst; } + /// \returns The last Instruction + Instruction *backInstruction() { return LastInst->Inst; } + + /// \returns The BasicBlock the IRSimilarityCandidate starts in. + BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); } + /// \returns The BasicBlock the IRSimilarityCandidate ends in. + BasicBlock *getEndBB() { return LastInst->Inst->getParent(); } + + /// \returns The Function that the IRSimilarityCandidate is located in. + Function *getFunction() { return getStartBB()->getParent(); } + + /// Finds the positive number associated with \p V if it has been mapped. + /// \param [in] V - the Value to find. + /// \returns The positive number corresponding to the value. + /// \returns None if not present. + Optional getGVN(Value *V) { + assert(V != nullptr && "Value is a nullptr?"); + DenseMap::iterator VNIt = ValueToNumber.find(V); + if (VNIt == ValueToNumber.end()) + return None; + return VNIt->second; + } + + /// Finds the Value associate with \p Num if it exists. + /// \param [in] Num - the number to find. + /// \returns The Value associated with the number. + /// \returns None if not present. + Optional fromGVN(unsigned Num) { + DenseMap::iterator VNIt = NumberToValue.find(Num); + if (VNIt == NumberToValue.end()) + return None; + assert(VNIt->second != nullptr && "Found value is a nullptr!"); + return VNIt->second; + } + + /// \param RHS -The IRSimilarityCandidate to compare against + /// \returns true if the IRSimilarityCandidate is occurs after the + /// IRSimilarityCandidate in the program. + bool operator<(const IRSimilarityCandidate &RHS) const { + return getStartIdx() > RHS.getStartIdx(); + } + + using iterator = IRInstructionDataList::iterator; + iterator begin() const { return iterator(front()); } + iterator end() const { return std::next(iterator(back())); } +}; } // end namespace IRSimilarity } // end namespace llvm diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index 0db7a110f77d..de9869211de4 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -163,3 +163,91 @@ unsigned IRInstructionMapper::mapToIllegalUnsigned( return INumber; } + +IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, + IRInstructionData *FirstInstIt, + IRInstructionData *LastInstIt) + : StartIdx(StartIdx), Len(Len) { + + assert(FirstInstIt != nullptr && "Instruction is nullptr!"); + assert(LastInstIt != nullptr && "Instruction is nullptr!"); + assert(StartIdx + Len > StartIdx && + "Overflow for IRSimilarityCandidate range?"); + assert(Len - 1 == + std::distance(iterator(FirstInstIt), iterator(LastInstIt)) && + "Length of the first and last IRInstructionData do not match the " + "given length"); + + // We iterate over the given instructions, and map each unique value + // to a unique number in the IRSimilarityCandidate ValueToNumber and + // NumberToValue maps. A constant get its own value globally, the individual + // uses of the constants are not considered to be unique. + // + // IR: Mapping Added: + // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 + // %add2 = add i32 %a, %1 %add2 -> 4 + // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 + // + // when replace with global values, starting from 1, would be + // + // 3 = add i32 1, 2 + // 4 = add i32 1, 3 + // 6 = add i32 5, 2 + unsigned LocalValNumber = 1; + IRInstructionDataList::iterator ID = iterator(*FirstInstIt); + for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { + // Map the operand values to an unsigned integer if it does not already + // have an unsigned integer assigned to it. + for (Value *Arg : ID->OperVals) + if (ValueToNumber.find(Arg) == ValueToNumber.end()) { + ValueToNumber.try_emplace(Arg, LocalValNumber); + NumberToValue.try_emplace(LocalValNumber, Arg); + LocalValNumber++; + } + + // Mapping the instructions to an unsigned integer if it is not already + // exist in the mapping. + if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) { + ValueToNumber.try_emplace(ID->Inst, LocalValNumber); + NumberToValue.try_emplace(LocalValNumber, ID->Inst); + LocalValNumber++; + } + } + + // Setting the first and last instruction data pointers for the candidate. If + // we got through the entire for loop without hitting an assert, we know + // that both of these instructions are not nullptrs. + FirstInst = FirstInstIt; + LastInst = LastInstIt; +} + +bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B) { + if (A.getLength() != B.getLength()) + return false; + + auto InstrDataForBoth = + zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); + + return all_of(InstrDataForBoth, + [](std::tuple R) { + IRInstructionData &A = std::get<0>(R); + IRInstructionData &B = std::get<1>(R); + if (!A.Legal || !B.Legal) + return false; + return isClose(A, B); + }); +} + +bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, + const IRSimilarityCandidate &B) { + auto DoesOverlap = [](const IRSimilarityCandidate &X, + const IRSimilarityCandidate &Y) { + // Check: + // XXXXXX X starts before Y ends + // YYYYYYY Y starts after X starts + return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; + }; + + return DoesOverlap(A, B) || DoesOverlap(B, A); +} diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp index f79149b5eda2..a30b9d5fdef4 100644 --- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp +++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// +#include "gtest/gtest.h" #include "llvm/Analysis/IRSimilarityIdentifier.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/SourceMgr.h" -#include "gtest/gtest.h" using namespace llvm; using namespace IRSimilarity; @@ -30,12 +30,9 @@ static std::unique_ptr makeLLVMModule(LLVMContext &Context, return M; } -void getVectors(Module &M, std::vector &InstrList, +void getVectors(Module &M, IRInstructionMapper &Mapper, + std::vector &InstrList, std::vector &UnsignedVec) { - SpecificBumpPtrAllocator InstDataAllocator; - SpecificBumpPtrAllocator IDLAllocator; - IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); - for (Function &F : M) for (BasicBlock &BB : F) Mapper.convertToUnsignedVec(BB, InstrList, UnsignedVec); @@ -56,7 +53,10 @@ TEST(IRInstructionMapper, OpcodeDifferentiation) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); // Check that the size of the unsigned vector and the instruction list are the // same as a safety check. @@ -84,7 +84,10 @@ TEST(IRInstructionMapper, OpcodeTypeSimilarity) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -109,7 +112,10 @@ TEST(IRInstructionMapper, TypeDifferentiation) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -131,7 +137,10 @@ TEST(IRInstructionMapper, PredicateDifferentiation) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -154,7 +163,10 @@ TEST(IRInstructionMapper, PredicateIsomorphism) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -176,7 +188,10 @@ TEST(IRInstructionMapper, PredicateSimilarity) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -199,7 +214,10 @@ TEST(IRInstructionMapper, FPPredicateSimilarity) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -222,7 +240,10 @@ TEST(IRInstructionMapper, FPPredicatDifference) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -245,7 +266,10 @@ TEST(IRInstructionMapper, ZextTypeSimilarity) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -268,7 +292,10 @@ TEST(IRInstructionMapper, SextTypeSimilarity) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -291,7 +318,10 @@ TEST(IRInstructionMapper, ZextTypeDifference) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -314,7 +344,10 @@ TEST(IRInstructionMapper, SextTypeDifference) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -337,7 +370,10 @@ TEST(IRInstructionMapper, LoadSimilarType) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -360,7 +396,10 @@ TEST(IRInstructionMapper, LoadDifferentType) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -383,7 +422,10 @@ TEST(IRInstructionMapper, LoadDifferentAlign) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -406,7 +448,10 @@ TEST(IRInstructionMapper, LoadDifferentVolatile) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -429,7 +474,10 @@ TEST(IRInstructionMapper, LoadSameVolatile) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -452,7 +500,10 @@ TEST(IRInstructionMapper, LoadDifferentAtomic) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -475,7 +526,10 @@ TEST(IRInstructionMapper, LoadSameAtomic) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -498,7 +552,10 @@ TEST(IRInstructionMapper, StoreSimilarType) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -521,7 +578,10 @@ TEST(IRInstructionMapper, StoreDifferentType) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -544,7 +604,10 @@ TEST(IRInstructionMapper, StoreDifferentAlign) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -567,7 +630,10 @@ TEST(IRInstructionMapper, StoreDifferentVolatile) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -590,7 +656,10 @@ TEST(IRInstructionMapper, StoreSameVolatile) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -613,7 +682,10 @@ TEST(IRInstructionMapper, StoreSameAtomic) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -636,7 +708,10 @@ TEST(IRInstructionMapper, StoreDifferentAtomic) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); ASSERT_TRUE(UnsignedVec.size() == 3); @@ -673,7 +748,10 @@ TEST(IRInstructionMapper, BranchIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -697,7 +775,10 @@ TEST(IRInstructionMapper, PhiIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -717,7 +798,10 @@ TEST(IRInstructionMapper, AllocaIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -741,7 +825,10 @@ TEST(IRInstructionMapper, GetElementPtrIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -763,7 +850,10 @@ TEST(IRInstructionMapper, CallIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -793,7 +883,10 @@ TEST(IRInstructionMapper, InvokeIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -823,7 +916,10 @@ TEST(IRInstructionMapper, CallBrInstIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -849,7 +945,10 @@ TEST(IRInstructionMapper, DebugInfoInvisible) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(3)); @@ -875,7 +974,10 @@ TEST(IRInstructionMapper, ExceptionHandlingTypeIdIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -904,7 +1006,10 @@ TEST(IRInstructionMapper, ExceptionHandlingExceptionCodeIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -926,7 +1031,10 @@ TEST(IRInstructionMapper, ExceptionHandlingUnwindIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -948,7 +1056,10 @@ TEST(IRInstructionMapper, ExceptionHandlingExceptionPointerIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -976,7 +1087,10 @@ TEST(IRInstructionMapper, CatchpadIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -1004,7 +1118,10 @@ TEST(IRInstructionMapper, CleanuppadIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(0)); @@ -1035,7 +1152,10 @@ TEST(IRInstructionMapper, MemSetIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(6)); @@ -1063,7 +1183,10 @@ TEST(IRInstructionMapper, MemCpyIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(6)); @@ -1091,7 +1214,10 @@ TEST(IRInstructionMapper, MemMoveIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(6)); @@ -1136,7 +1262,10 @@ TEST(IRInstructionMapper, VarArgsIllegal) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); ASSERT_EQ(UnsignedVec.size(), static_cast(16)); @@ -1166,7 +1295,10 @@ TEST(IRInstructionMapper, RepeatedIllegalLength) { std::vector InstrList; std::vector UnsignedVec; - getVectors(*M, InstrList, UnsignedVec); + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); // Check that the size of the unsigned vector and the instruction list are the // same as a safety check. @@ -1175,3 +1307,219 @@ TEST(IRInstructionMapper, RepeatedIllegalLength) { // Make sure that the unsigned vector is the expected size. ASSERT_TRUE(UnsignedVec.size() == 6); } + +// A helper function that accepts an instruction list from a module made up of +// two blocks of two legal instructions and terminator, and checks them for +// instruction similarity. +static bool longSimCandCompare(std::vector &InstrList) { + std::vector::iterator Start, End; + + Start = InstrList.begin(); + End = InstrList.begin(); + + std::advance(End, 1); + IRSimilarityCandidate Cand1(0, 2, *Start, *End); + + Start = InstrList.begin(); + End = InstrList.begin(); + + std::advance(Start, 3); + std::advance(End, 4); + IRSimilarityCandidate Cand2(3, 2, *Start, *End); + return IRSimilarityCandidate::isSimilar(Cand1, Cand2); +} + +// Checks that two adds with commuted operands are considered to be the same +// instructions. +TEST(IRSimilarityCandidate, CheckIdenticalInstructions) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(3)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + std::vector::iterator Start, End; + Start = InstrList.begin(); + End = InstrList.begin(); + std::advance(End, 1); + IRSimilarityCandidate Cand1(0, 2, *Start, *End); + IRSimilarityCandidate Cand2(0, 2, *Start, *End); + + ASSERT_TRUE(IRSimilarityCandidate::isSimilar(Cand1, Cand2)); +} + +// Checks that IRSimilarityCandidates wrapping these two regions of instructions +// are able to differentiate between instructions that have different opcodes. +TEST(IRSimilarityCandidate, CheckRegionsDifferentInstruction) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + ret i32 0 + bb1: + %2 = sub i32 %a, %b + %3 = add i32 %b, %a + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(6)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_FALSE(longSimCandCompare(InstrList)); +} + +// Checks that IRSimilarityCandidates wrapping these two regions of instructions +// are able to differentiate between instructions that have different types. +TEST(IRSimilarityCandidate, CheckRegionsDifferentTypes) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b, i64 %c, i64 %d) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %b, %a + ret i32 0 + bb1: + %2 = add i64 %c, %d + %3 = add i64 %d, %c + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(6)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_FALSE(longSimCandCompare(InstrList)); +} + +// Check that debug instructions do not impact similarity. They are marked as +// invisible. +TEST(IRSimilarityCandidate, IdenticalWithDebug) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + call void @llvm.dbg.value(metadata !0) + %1 = add i32 %b, %a + ret i32 0 + bb1: + %2 = add i32 %a, %b + call void @llvm.dbg.value(metadata !1) + %3 = add i32 %b, %a + ret i32 0 + bb2: + %4 = add i32 %a, %b + %5 = add i32 %b, %a + ret i32 0 + } + + declare void @llvm.dbg.value(metadata) + !0 = distinct !{!"test\00", i32 10} + !1 = distinct !{!"test\00", i32 11})"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(9)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + ASSERT_TRUE(longSimCandCompare(InstrList)); +} + +// Checks that IRSimilarityCandidates that include illegal instructions, are not +// considered to be the same set of instructions. In these sets of instructions +// the allocas are illegal. +TEST(IRSimilarityCandidate, IllegalInCandidate) { + StringRef ModuleString = R"( + define i32 @f(i32 %a, i32 %b) { + bb0: + %0 = add i32 %a, %b + %1 = add i32 %a, %b + %2 = alloca i32 + ret i32 0 + bb1: + %3 = add i32 %a, %b + %4 = add i32 %a, %b + %5 = alloca i32 + ret i32 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + getVectors(*M, Mapper, InstrList, UnsignedVec); + + // Check to make sure that we have a long enough region. + ASSERT_EQ(InstrList.size(), static_cast(6)); + // Check that the instructions were added correctly to both vectors. + ASSERT_TRUE(InstrList.size() == UnsignedVec.size()); + + std::vector::iterator Start, End; + + Start = InstrList.begin(); + End = InstrList.begin(); + + std::advance(End, 2); + IRSimilarityCandidate Cand1(0, 3, *Start, *End); + + Start = InstrList.begin(); + End = InstrList.begin(); + + std::advance(Start, 3); + std::advance(End, 5); + IRSimilarityCandidate Cand2(3, 3, *Start, *End); + ASSERT_FALSE(IRSimilarityCandidate::isSimilar(Cand1, Cand2)); +}