Revert "[IRSim] Adding IRSimilarityCandidate that contains a region of IRInstructionData."

This reverts commit 4944bb190f.
This commit is contained in:
Andrew Litteken 2020-09-22 21:02:34 -05:00
parent 61ac58e10a
commit 88bc59c300
3 changed files with 0 additions and 425 deletions

View File

@ -37,10 +37,6 @@
// or comparison predicate. These are used to create a hash to map instructions
// to integers to be used in similarity matching in sequences of instructions
//
// Terminology:
// An IRSimilarityCandidate is a region of IRInstructionData (wrapped
// Instructions), usually used to denote a region of similarity has been found.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_IRSIMILARITYIDENTIFIER_H
@ -390,137 +386,6 @@ struct IRInstructionMapper {
InstructionClassification InstClassifier;
};
/// This is a class that wraps a range of IRInstructionData from one point to
/// another in the vector of IRInstructionData, which is a region of the
/// program. It is also responsible for defining the structure within this
/// region of instructions.
///
/// The structure of a region is defined through a value numbering system
/// assigned to each unique value in a region at the creation of the
/// IRSimilarityCandidate.
///
/// For example, for each Instruction we add a mapping for each new
/// value seen in that Instruction.
/// IR: Mapping Added:
/// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
/// %add2 = add i32 %a, %1 %add2 -> 4
/// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
///
/// We can compare IRSimilarityCandidates against one another.
/// The \ref isSimilar function compares each IRInstructionData against one
/// another and if we have the same sequences of IRInstructionData that would
/// create the same hash, we have similar IRSimilarityCandidates.
class IRSimilarityCandidate {
private:
/// The start index of this IRSimilarityCandidate in the instruction list.
unsigned StartIdx = 0;
/// The number of instructions in this IRSimilarityCandidate.
unsigned Len = 0;
/// The first instruction in this IRSimilarityCandidate.
IRInstructionData *FirstInst = nullptr;
/// The last instruction in this IRSimilarityCandidate.
IRInstructionData *LastInst = nullptr;
/// Global Value Numbering structures
/// @{
/// Stores the mapping of the value to the number assigned to it in the
/// IRSimilarityCandidate.
DenseMap<Value *, unsigned> ValueToNumber;
/// Stores the mapping of the number to the value assigned this number.
DenseMap<unsigned, Value *> NumberToValue;
/// @}
public:
/// \param StartIdx - The starting location of the region.
/// \param StartIdx - The length of the region.
/// \param FirstInstIt - The starting IRInstructionData of the region.
/// \param LastInstIt - The ending IRInstructionData of the region.
IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
IRInstructionData *FirstInstIt,
IRInstructionData *LastInstIt);
/// \param A - The first IRInstructionCandidate to compare.
/// \param B - The second IRInstructionCandidate to compare.
/// \returns True when every IRInstructionData in \p A is similar to every
/// IRInstructionData in \p B.
static bool isSimilar(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B);
/// Compare the start and end indices of the two IRSimilarityCandidates for
/// whether they overlap. If the start instruction of one
/// IRSimilarityCandidate is less than the end instruction of the other, and
/// the start instruction of one is greater than the start instruction of the
/// other, they overlap.
///
/// \returns true if the IRSimilarityCandidates do not have overlapping
/// instructions.
static bool overlap(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B);
/// \returns the number of instructions in this Candidate.
unsigned getLength() const { return Len; }
/// \returns the start index of this IRSimilarityCandidate.
unsigned getStartIdx() const { return StartIdx; }
/// \returns the end index of this IRSimilarityCandidate.
unsigned getEndIdx() const { return StartIdx + Len - 1; }
/// \returns The first IRInstructionData.
IRInstructionData *front() const { return FirstInst; }
/// \returns The last IRInstructionData.
IRInstructionData *back() const { return LastInst; }
/// \returns The first Instruction.
Instruction *frontInstruction() { return FirstInst->Inst; }
/// \returns The last Instruction
Instruction *backInstruction() { return LastInst->Inst; }
/// \returns The BasicBlock the IRSimilarityCandidate starts in.
BasicBlock *getStartBB() { return FirstInst->Inst->getParent(); }
/// \returns The BasicBlock the IRSimilarityCandidate ends in.
BasicBlock *getEndBB() { return LastInst->Inst->getParent(); }
/// \returns The Function that the IRSimilarityCandidate is located in.
Function *getFunction() { return getStartBB()->getParent(); }
/// Finds the positive number associated with \p V if it has been mapped.
/// \param [in] V - the Value to find.
/// \returns The positive number corresponding to the value.
/// \returns None if not present.
Optional<unsigned> getGVN(Value *V) {
assert(V != nullptr && "Value is a nullptr?");
DenseMap<Value *, unsigned>::iterator VNIt = ValueToNumber.find(V);
if (VNIt == ValueToNumber.end())
return None;
return VNIt->second;
}
/// Finds the Value associate with \p Num if it exists.
/// \param [in] Num - the number to find.
/// \returns The Value associated with the number.
/// \returns None if not present.
Optional<Value *> fromGVN(unsigned Num) {
DenseMap<unsigned, Value *>::iterator VNIt = NumberToValue.find(Num);
if (VNIt == NumberToValue.end())
return None;
assert(VNIt->second != nullptr && "Found value is a nullptr!");
return VNIt->second;
}
/// \param RHS -The IRSimilarityCandidate to compare against
/// \returns true if the IRSimilarityCandidate is occurs after the
/// IRSimilarityCandidate in the program.
bool operator<(const IRSimilarityCandidate &RHS) const {
return getStartIdx() > RHS.getStartIdx();
}
using iterator = IRInstructionDataList::iterator;
iterator begin() const { return iterator(front()); }
iterator end() const { return std::next(iterator(back())); }
};
} // end namespace IRSimilarity
} // end namespace llvm

View File

@ -163,91 +163,3 @@ unsigned IRInstructionMapper::mapToIllegalUnsigned(
return INumber;
}
IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
IRInstructionData *FirstInstIt,
IRInstructionData *LastInstIt)
: StartIdx(StartIdx), Len(Len) {
assert(FirstInstIt != nullptr && "Instruction is nullptr!");
assert(LastInstIt != nullptr && "Instruction is nullptr!");
assert(StartIdx + Len > StartIdx &&
"Overflow for IRSimilarityCandidate range?");
assert(Len - 1 ==
std::distance(iterator(FirstInstIt), iterator(LastInstIt)) &&
"Length of the first and last IRInstructionData do not match the "
"given length");
// We iterate over the given instructions, and map each unique value
// to a unique number in the IRSimilarityCandidate ValueToNumber and
// NumberToValue maps. A constant get its own value globally, the individual
// uses of the constants are not considered to be unique.
//
// IR: Mapping Added:
// %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
// %add2 = add i32 %a, %1 %add2 -> 4
// %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
//
// when replace with global values, starting from 1, would be
//
// 3 = add i32 1, 2
// 4 = add i32 1, 3
// 6 = add i32 5, 2
unsigned LocalValNumber = 1;
IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
// Map the operand values to an unsigned integer if it does not already
// have an unsigned integer assigned to it.
for (Value *Arg : ID->OperVals)
if (ValueToNumber.find(Arg) == ValueToNumber.end()) {
ValueToNumber.try_emplace(Arg, LocalValNumber);
NumberToValue.try_emplace(LocalValNumber, Arg);
LocalValNumber++;
}
// Mapping the instructions to an unsigned integer if it is not already
// exist in the mapping.
if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) {
ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
NumberToValue.try_emplace(LocalValNumber, ID->Inst);
LocalValNumber++;
}
}
// Setting the first and last instruction data pointers for the candidate. If
// we got through the entire for loop without hitting an assert, we know
// that both of these instructions are not nullptrs.
FirstInst = FirstInstIt;
LastInst = LastInstIt;
}
bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B) {
if (A.getLength() != B.getLength())
return false;
auto InstrDataForBoth =
zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
return all_of(InstrDataForBoth,
[](std::tuple<IRInstructionData &, IRInstructionData &> R) {
IRInstructionData &A = std::get<0>(R);
IRInstructionData &B = std::get<1>(R);
if (!A.Legal || !B.Legal)
return false;
return isClose(A, B);
});
}
bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B) {
auto DoesOverlap = [](const IRSimilarityCandidate &X,
const IRSimilarityCandidate &Y) {
// Check:
// XXXXXX X starts before Y ends
// YYYYYYY Y starts after X starts
return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
};
return DoesOverlap(A, B) || DoesOverlap(B, A);
}

View File

@ -1175,205 +1175,3 @@ TEST(IRInstructionMapper, RepeatedIllegalLength) {
// Make sure that the unsigned vector is the expected size.
ASSERT_TRUE(UnsignedVec.size() == 6);
}
// A helper function that accepts an instruction list from a module made up of
// two blocks of two legal instructions and terminator, and checks them for
// instruction similarity.
static bool longSimCandCompare(std::vector<IRInstructionData *> &InstrList) {
std::vector<IRInstructionData *>::iterator Start, End;
Start = InstrList.begin();
End = InstrList.begin();
std::advance(End, 1);
IRSimilarityCandidate Cand1(0, 2, *Start, *End);
Start = InstrList.begin();
End = InstrList.begin();
std::advance(Start, 3);
std::advance(End, 4);
IRSimilarityCandidate Cand2(3, 2, *Start, *End);
return IRSimilarityCandidate::isSimilar(Cand1, Cand2);
}
// Checks that two adds with commuted operands are considered to be the same
// instructions.
TEST(IRSimilarityCandidate, CheckIdenticalInstructions) {
StringRef ModuleString = R"(
define i32 @f(i32 %a, i32 %b) {
bb0:
%0 = add i32 %a, %b
%1 = add i32 %b, %a
ret i32 0
})";
LLVMContext Context;
std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> UnsignedVec;
getVectors(*M, InstrList, UnsignedVec);
// Check to make sure that we have a long enough region.
ASSERT_EQ(InstrList.size(), static_cast<unsigned>(3));
// Check that the instructions were added correctly to both vectors.
ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
std::vector<IRInstructionData *>::iterator Start, End;
Start = InstrList.begin();
End = InstrList.begin();
std::advance(End, 1);
IRSimilarityCandidate Cand1(0, 2, *Start, *End);
IRSimilarityCandidate Cand2(0, 2, *Start, *End);
ASSERT_TRUE(IRSimilarityCandidate::isSimilar(Cand1, Cand2));
}
// Checks that IRSimilarityCandidates wrapping these two regions of instructions
// are able to differentiate between instructions that have different opcodes.
TEST(IRSimilarityCandidate, CheckRegionsDifferentInstruction) {
StringRef ModuleString = R"(
define i32 @f(i32 %a, i32 %b) {
bb0:
%0 = add i32 %a, %b
%1 = add i32 %b, %a
ret i32 0
bb1:
%2 = sub i32 %a, %b
%3 = add i32 %b, %a
ret i32 0
})";
LLVMContext Context;
std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> UnsignedVec;
getVectors(*M, InstrList, UnsignedVec);
// Check to make sure that we have a long enough region.
ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
// Check that the instructions were added correctly to both vectors.
ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
ASSERT_FALSE(longSimCandCompare(InstrList));
}
// Checks that IRSimilarityCandidates wrapping these two regions of instructions
// are able to differentiate between instructions that have different types.
TEST(IRSimilarityCandidate, CheckRegionsDifferentTypes) {
StringRef ModuleString = R"(
define i32 @f(i32 %a, i32 %b, i64 %c, i64 %d) {
bb0:
%0 = add i32 %a, %b
%1 = add i32 %b, %a
ret i32 0
bb1:
%2 = add i64 %c, %d
%3 = add i64 %d, %c
ret i32 0
})";
LLVMContext Context;
std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> UnsignedVec;
getVectors(*M, InstrList, UnsignedVec);
// Check to make sure that we have a long enough region.
ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
// Check that the instructions were added correctly to both vectors.
ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
ASSERT_FALSE(longSimCandCompare(InstrList));
}
// Check that debug instructions do not impact similarity. They are marked as
// invisible.
TEST(IRSimilarityCandidate, IdenticalWithDebug) {
StringRef ModuleString = R"(
define i32 @f(i32 %a, i32 %b) {
bb0:
%0 = add i32 %a, %b
call void @llvm.dbg.value(metadata !0)
%1 = add i32 %b, %a
ret i32 0
bb1:
%2 = add i32 %a, %b
call void @llvm.dbg.value(metadata !1)
%3 = add i32 %b, %a
ret i32 0
bb2:
%4 = add i32 %a, %b
%5 = add i32 %b, %a
ret i32 0
}
declare void @llvm.dbg.value(metadata)
!0 = distinct !{!"test\00", i32 10}
!1 = distinct !{!"test\00", i32 11})";
LLVMContext Context;
std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> UnsignedVec;
getVectors(*M, InstrList, UnsignedVec);
// Check to make sure that we have a long enough region.
ASSERT_EQ(InstrList.size(), static_cast<unsigned>(9));
// Check that the instructions were added correctly to both vectors.
ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
ASSERT_TRUE(longSimCandCompare(InstrList));
}
// Checks that IRSimilarityCandidates that include illegal instructions, are not
// considered to be the same set of instructions. In these sets of instructions
// the allocas are illegal.
TEST(IRSimilarityCandidate, IllegalInCandidate) {
StringRef ModuleString = R"(
define i32 @f(i32 %a, i32 %b) {
bb0:
%0 = add i32 %a, %b
%1 = add i32 %a, %b
%2 = alloca i32
ret i32 0
bb1:
%3 = add i32 %a, %b
%4 = add i32 %a, %b
%5 = alloca i32
ret i32 0
})";
LLVMContext Context;
std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> UnsignedVec;
getVectors(*M, InstrList, UnsignedVec);
// Check to make sure that we have a long enough region.
ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
// Check that the instructions were added correctly to both vectors.
ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
std::vector<IRInstructionData *>::iterator Start, End;
Start = InstrList.begin();
End = InstrList.begin();
std::advance(End, 2);
IRSimilarityCandidate Cand1(0, 3, *Start, *End);
Start = InstrList.begin();
End = InstrList.begin();
std::advance(Start, 3);
std::advance(End, 5);
IRSimilarityCandidate Cand2(3, 3, *Start, *End);
ASSERT_FALSE(IRSimilarityCandidate::isSimilar(Cand1, Cand2));
}