[IRSim] Adding structural comparison to IRSimilarityCandidate.

Just because sequences of instructions are similar to one another,
doesn't mean they are doing the same thing.

This introduces a structural check for the IRSimilarityCandidate that
compares two IRSimilarityCandidates against one another, and in each
instruction creates a mapping between the operands and results, or
checks that the existing mapping is valid.  If this check passes, it
means we have structurally similar IRSimilarityCandidates.

Tests for whether the candidates are found in
unittests/Analysis/IRSimilarityIdentifierTest.cpp.
This commit is contained in:
Andrew Litteken 2020-09-23 11:04:39 -05:00
parent 9c70281497
commit b27db2bb68
3 changed files with 312 additions and 2 deletions

View File

@ -410,6 +410,28 @@ struct IRInstructionMapper {
/// The \ref isSimilar function compares each IRInstructionData against one
/// another and if we have the same sequences of IRInstructionData that would
/// create the same hash, we have similar IRSimilarityCandidates.
///
/// We can also compare the structure of IRSimilarityCandidates. If we can
/// create a mapping of registers in the region contained by one
/// IRSimilarityCandidate to the region contained by different
/// IRSimilarityCandidate, they can be considered structurally similar.
///
/// IRSimilarityCandidate1: IRSimilarityCandidate2:
/// %add1 = add i32 %a, %b %add1 = add i32 %d, %e
/// %add2 = add i32 %a, %c %add2 = add i32 %d, %f
/// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4
///
/// Can have the following mapping from candidate to candidate of:
/// %a -> %d, %b -> %e, %c -> %f, c1 -> c3, c2 -> c4
/// and can be considered similar.
///
/// IRSimilarityCandidate1: IRSimilarityCandidate2:
/// %add1 = add i32 %a, %b %add1 = add i32 %d, c4
/// %add2 = add i32 %a, %c %add2 = add i32 %d, %f
/// %add3 = add i32 c1, c2 %add3 = add i32 c3, c4
///
/// We cannot create the same mapping since the use of c4 is not used in the
/// same way as %b or c2.
class IRSimilarityCandidate {
private:
/// The start index of this IRSimilarityCandidate in the instruction list.
@ -448,6 +470,37 @@ public:
/// IRInstructionData in \p B.
static bool isSimilar(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B);
/// \param A - The first IRInstructionCandidate to compare.
/// \param B - The second IRInstructionCandidate to compare.
/// \returns True when every IRInstructionData in \p A is structurally similar
/// to \p B.
static bool compareStructure(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B);
struct OperandMapping {
/// The IRSimilarityCandidate that holds the instruction the OperVals were
/// pulled from.
const IRSimilarityCandidate &IRSC;
/// The operand values to be analyzed.
ArrayRef<Value *> &OperVals;
/// The current mapping of global value numbers from one IRSimilarityCandidate
/// to another IRSimilarityCandidate.
DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping;
};
/// Compare the operands in \p A and \p B and check that the current mapping
/// of global value numbers from \p A to \p B and \p B to \A is consistent.
///
/// \param A - The first IRInstructionCandidate, operand values, and current
/// operand mappings to compare.
/// \param B - The second IRInstructionCandidate, operand values, and current
/// operand mappings to compare.
/// \returns true if the IRSimilarityCandidates operands are compatible.
static bool compareOperandMapping(OperandMapping A, OperandMapping B);
/// Compare the start and end indices of the two IRSimilarityCandidates for
/// whether they overlap. If the start instruction of one
/// IRSimilarityCandidate is less than the end instruction of the other, and
@ -521,6 +574,7 @@ public:
iterator begin() const { return iterator(front()); }
iterator end() const { return std::next(iterator(back())); }
};
} // end namespace IRSimilarity
} // end namespace llvm

View File

@ -239,6 +239,160 @@ bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
});
}
/// Determine if operand number \p TargetArgVal is in the current mapping set
/// for operand number \p SourceArgVal.
///
/// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
/// value numbers from source IRSimilarityCandidate to target
/// IRSimilarityCandidate.
/// \param [in] SourceArgVal The global value number for an operand in the
/// in the original candidate.
/// \param [in] TargetArgVal The global value number for the corresponding
/// operand in the other candidate.
/// \returns True if there exists a mapping and false if not.
bool checkNumberingAndReplace(
DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
unsigned SourceArgVal, unsigned TargetArgVal) {
// We are given two unsigned integers representing the global values of
// the operands in different IRSimilarityCandidates and a current mapping
// between the two.
//
// Source Operand GVN: 1
// Target Operand GVN: 2
// CurrentMapping: {1: {1, 2}}
//
// Since we have mapping, and the target operand is contained in the set, we
// update it to:
// CurrentMapping: {1: {2}}
// and can return true. But, if the mapping was
// CurrentMapping: {1: {3}}
// we would return false.
bool WasInserted;
DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
// If we created a new mapping, then we are done.
if (WasInserted)
return true;
// If there is more than one option in the mapping set, and the target value
// is included in the mapping set replace that set with one that only includes
// the target value, as it is the only valid mapping via the non commutative
// instruction.
DenseSet<unsigned> &TargetSet = Val->second;
if (TargetSet.size() > 1 && TargetSet.find(TargetArgVal) != TargetSet.end()) {
TargetSet.clear();
TargetSet.insert(TargetArgVal);
return true;
}
// Return true if we can find the value in the set.
return TargetSet.find(TargetArgVal) != TargetSet.end();
}
bool IRSimilarityCandidate::compareOperandMapping(OperandMapping A,
OperandMapping B) {
// Iterators to keep track of where we are in the operands for each
// Instruction.
ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
unsigned OperandLength = A.OperVals.size();
// For each operand, get the value numbering and ensure it is consistent.
for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
// Attempt to add a set with only the target value. If there is no mapping
// we can create it here.
//
// For an instruction like a subtraction:
// IRSimilarityCandidateA: IRSimilarityCandidateB:
// %resultA = sub %a, %b %resultB = sub %d, %e
//
// We map %a -> %d and %b -> %e.
//
// And check to see whether their mapping is consistent in
// checkNumberingAndReplace.
if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
return false;
if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
return false;
}
return true;
}
bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B) {
if (A.getLength() != B.getLength())
return false;
if (A.ValueToNumber.size() != B.ValueToNumber.size())
return false;
iterator ItA = A.begin();
iterator ItB = B.begin();
// These sets create a create a mapping between the values in one candidate
// to values in the other candidate. If we create a set with one element,
// and that same element maps to the original element in the candidate
// we have a good mapping.
DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
bool WasInserted;
// Iterate over the instructions contained in each candidate
unsigned SectionLength = A.getStartIdx() + A.getLength();
for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
ItA++, ItB++, Loc++) {
// Make sure the instructions are similar to one another.
if (!isClose(*ItA, *ItB))
return false;
Instruction *IA = ItA->Inst;
Instruction *IB = ItB->Inst;
if (!ItA->Legal || !ItB->Legal)
return false;
// Get the operand sets for the instructions.
ArrayRef<Value *> OperValsA = ItA->OperVals;
ArrayRef<Value *> OperValsB = ItB->OperVals;
unsigned InstValA = A.ValueToNumber.find(IA)->second;
unsigned InstValB = B.ValueToNumber.find(IB)->second;
// Ensure that the mappings for the instructions exists.
std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
if (!WasInserted && ValueMappingIt->second.find(InstValB) ==
ValueMappingIt->second.end())
return false;
std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert(
std::make_pair(InstValB, DenseSet<unsigned>({InstValA})));
if (!WasInserted && ValueMappingIt->second.find(InstValA) ==
ValueMappingIt->second.end())
return false;
// TODO: Handle commutative instructions by mapping one operand to many
// operands instead only mapping a single operand to a single operand.
if (!compareOperandMapping({A, OperValsA, ValueNumberMappingA},
{B, OperValsB, ValueNumberMappingB}))
return false;
}
return true;
}
bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B) {
auto DoesOverlap = [](const IRSimilarityCandidate &X,

View File

@ -11,13 +11,13 @@
//
//===----------------------------------------------------------------------===//
#include "gtest/gtest.h"
#include "llvm/Analysis/IRSimilarityIdentifier.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/SourceMgr.h"
#include "gtest/gtest.h"
using namespace llvm;
using namespace IRSimilarity;
@ -1311,7 +1311,8 @@ TEST(IRInstructionMapper, RepeatedIllegalLength) {
// A helper function that accepts an instruction list from a module made up of
// two blocks of two legal instructions and terminator, and checks them for
// instruction similarity.
static bool longSimCandCompare(std::vector<IRInstructionData *> &InstrList) {
static bool longSimCandCompare(std::vector<IRInstructionData *> &InstrList,
bool Structure = false) {
std::vector<IRInstructionData *>::iterator Start, End;
Start = InstrList.begin();
@ -1326,6 +1327,8 @@ static bool longSimCandCompare(std::vector<IRInstructionData *> &InstrList) {
std::advance(Start, 3);
std::advance(End, 4);
IRSimilarityCandidate Cand2(3, 2, *Start, *End);
if (Structure)
return IRSimilarityCandidate::compareStructure(Cand1, Cand2);
return IRSimilarityCandidate::isSimilar(Cand1, Cand2);
}
@ -1523,3 +1526,102 @@ TEST(IRSimilarityCandidate, IllegalInCandidate) {
IRSimilarityCandidate Cand2(3, 3, *Start, *End);
ASSERT_FALSE(IRSimilarityCandidate::isSimilar(Cand1, Cand2));
}
// Checks that different structure, in this case, where we introduce a new
// needed input in one region, is recognized as different.
TEST(IRSimilarityCandidate, DifferentStructure) {
StringRef ModuleString = R"(
define i32 @f(i32 %a, i32 %b) {
bb0:
%0 = add i32 %a, %b
%1 = add i32 %b, %a
ret i32 0
bb1:
%2 = add i32 %a, %b
%3 = add i32 %b, %0
ret i32 0
})";
LLVMContext Context;
std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> UnsignedVec;
SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
getVectors(*M, Mapper, InstrList, UnsignedVec);
// Check to make sure that we have a long enough region.
ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
// Check that the instructions were added correctly to both vectors.
ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
ASSERT_FALSE(longSimCandCompare(InstrList, true));
}
// Checks that the same structure is recognized between two candidates. The
// items %a and %b are used in the same way in both sets of instructions.
TEST(IRSimilarityCandidate, SameStructure) {
StringRef ModuleString = R"(
define i32 @f(i32 %a, i32 %b) {
bb0:
%0 = add i32 %a, %b
%1 = sub i32 %b, %a
ret i32 0
bb1:
%2 = add i32 %a, %b
%3 = sub i32 %b, %a
ret i32 0
})";
LLVMContext Context;
std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> UnsignedVec;
SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
getVectors(*M, Mapper, InstrList, UnsignedVec);
// Check to make sure that we have a long enough region.
ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
// Check that the instructions were added correctly to both vectors.
ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
ASSERT_TRUE(longSimCandCompare(InstrList, true));
}
// Checks that the same structure is recognized between two candidates. While
// the input names are reversed, they still perform the same overall operation.
TEST(IRSimilarityCandidate, DifferentNameSameStructure) {
StringRef ModuleString = R"(
define i32 @f(i32 %a, i32 %b) {
bb0:
%0 = add i32 %a, %b
%1 = add i32 %b, %a
ret i32 0
bb1:
%2 = add i32 %b, %a
%3 = add i32 %a, %b
ret i32 0
})";
LLVMContext Context;
std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> UnsignedVec;
SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
getVectors(*M, Mapper, InstrList, UnsignedVec);
// Check to make sure that we have a long enough region.
ASSERT_EQ(InstrList.size(), static_cast<unsigned>(6));
// Check that the instructions were added correctly to both vectors.
ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
ASSERT_TRUE(longSimCandCompare(InstrList, true));
}