diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index 7b81d5754930..300873f239f9 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -262,7 +262,20 @@ struct IRInstructionData llvm::hash_value(ID.Inst->getType()), llvm::hash_value(ID.getPredicate()), llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); - else if (isa(ID.Inst)) { + + if (IntrinsicInst *II = dyn_cast(ID.Inst)) { + // To hash intrinsics, we use the opcode, and types like the other + // instructions, but also, the Intrinsic ID, and the Name of the + // intrinsic. + Intrinsic::ID IntrinsicID = II->getIntrinsicID(); + return llvm::hash_combine( + llvm::hash_value(ID.Inst->getOpcode()), + llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID), + llvm::hash_value(*ID.CalleeName), + llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); + } + + if (isa(ID.Inst)) { std::string FunctionName = *ID.CalleeName; return llvm::hash_combine( llvm::hash_value(ID.Inst->getOpcode()), @@ -270,6 +283,7 @@ struct IRInstructionData llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName), llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); } + return llvm::hash_combine( llvm::hash_value(ID.Inst->getOpcode()), llvm::hash_value(ID.Inst->getType()), @@ -525,8 +539,17 @@ struct IRInstructionMapper { // analyzed for similarity as it has no bearing on the outcome of the // program. InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; } - // TODO: Handle specific intrinsics. - InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; } + InstrType visitIntrinsicInst(IntrinsicInst &II) { + // These are disabled due to complications in the CodeExtractor when + // outlining these instructions. For instance, It is unclear what we + // should do when moving only the start or end lifetime instruction into + // an outlined function. Also, assume-like intrinsics could be removed + // from the region, removing arguments, causing discrepencies in the + // number of inputs between different regions. + if (II.isLifetimeStartOrEnd() || II.isAssumeLikeIntrinsic()) + return Illegal; + return EnableIntrinsics ? Legal : Illegal; + } // We only allow call instructions where the function has a name and // is not an indirect call. InstrType visitCallInst(CallInst &CI) { @@ -553,6 +576,10 @@ struct IRInstructionMapper { // The flag variable that lets the classifier know whether we should // allow indirect calls to be considered legal instructions. bool EnableIndirectCalls = false; + + // Flag that lets the classifier know whether we should allow intrinsics to + // be checked for similarity. + bool EnableIntrinsics = false; }; /// Maps an Instruction to a member of InstrType. @@ -939,10 +966,12 @@ class IRSimilarityIdentifier { public: IRSimilarityIdentifier(bool MatchBranches = true, bool MatchIndirectCalls = true, - bool MatchCallsWithName = false) + bool MatchCallsWithName = false, + bool MatchIntrinsics = true) : Mapper(&InstDataAllocator, &InstDataListAllocator), EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls), - EnableMatchingCallsByName(MatchCallsWithName) {} + EnableMatchingCallsByName(MatchCallsWithName), + EnableIntrinsics(MatchIntrinsics) {} private: /// Map the instructions in the module to unsigned integers, using mapping @@ -1031,6 +1060,10 @@ private: /// convention, attributes and type signature. bool EnableMatchingCallsByName = true; + /// The flag variable that marks whether we should check intrinsics for + /// similarity. + bool EnableIntrinsics = true; + /// The SimilarityGroups found with the most recent run of \ref /// findSimilarity. None if there is no recent run. Optional SimilarityCandidates; diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h index ed74c8ed0e96..1bd32fb78377 100644 --- a/llvm/include/llvm/Transforms/IPO/IROutliner.h +++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -359,7 +359,7 @@ private: bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; } // TODO: Handle specific intrinsics individually from those that can be // handled. - bool IntrinsicInst(IntrinsicInst &II) { return false; } + bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; } // We only handle CallInsts that are not indirect, since we cannot guarantee // that they have a name in these cases. bool visitCallInst(CallInst &CI) { @@ -395,6 +395,10 @@ private: // The flag variable that marks whether we should allow indirect calls // to be outlined. bool EnableIndirectCalls = true; + + // The flag variable that marks whether we should allow intrinsics + // instructions to be outlined. + bool EnableIntrinsics = false; }; /// A InstVisitor used to exclude certain instructions from being outlined. diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index d2f0c57f6dab..b3bc3ddd6207 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -39,6 +39,10 @@ cl::opt MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, cl::desc("only allow matching call instructions if the " "name and type signature match.")); + +cl::opt + DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, + cl::desc("Don't match or outline intrinsics")); } // namespace llvm IRInstructionData::IRInstructionData(Instruction &I, bool Legality, @@ -109,6 +113,24 @@ void IRInstructionData::setCalleeName(bool MatchByName) { assert(CI && "Instruction must be call"); CalleeName = ""; + if (IntrinsicInst *II = dyn_cast(Inst)) { + // To hash intrinsics, we use the opcode, and types like the other + // instructions, but also, the Intrinsic ID, and the Name of the + // intrinsic. + Intrinsic::ID IntrinsicID = II->getIntrinsicID(); + FunctionType *FT = II->getFunctionType(); + // If there is an overloaded name, we have to use the complex version + // of getName to get the entire string. + if (Intrinsic::isOverloaded(IntrinsicID)) + CalleeName = + Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT); + // If there is not an overloaded name, we only need to use this version. + else + CalleeName = Intrinsic::getName(IntrinsicID).str(); + + return; + } + if (!CI->isIndirectCall() && MatchByName) CalleeName = CI->getCalledFunction()->getName().str(); } @@ -1139,6 +1161,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( Mapper.InstClassifier.EnableBranches = this->EnableBranches; Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; + Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; populateMapper(Modules, InstrList, IntegerMapping); findCandidates(InstrList, IntegerMapping); @@ -1151,6 +1174,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { Mapper.InstClassifier.EnableBranches = this->EnableBranches; Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; + Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; std::vector InstrList; std::vector IntegerMapping; @@ -1172,7 +1196,7 @@ IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, - MatchCallsByName)); + MatchCallsByName, !DisableIntrinsics)); return false; } @@ -1189,9 +1213,8 @@ bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { AnalysisKey IRSimilarityAnalysis::Key; IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, ModuleAnalysisManager &) { - auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, - MatchCallsByName); + MatchCallsByName, !DisableIntrinsics); IRSI.findSimilarity(M); return IRSI; } diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index e064fbbef595..faf7cb7d566a 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -42,6 +42,11 @@ extern cl::opt DisableBranches; // A command flag to be used for debugging to indirect calls from similarity // matching and outlining. extern cl::opt DisableIndirectCalls; + +// A command flag to be used for debugging to exclude intrinsics from similarity +// matching and outlining. +extern cl::opt DisableIntrinsics; + } // namespace llvm // Set to true if the user wants the ir outliner to run on linkonceodr linkage @@ -2610,6 +2615,8 @@ unsigned IROutliner::doOutline(Module &M) { // Find the possible similarity sections. InstructionClassifier.EnableBranches = !DisableBranches; InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls; + InstructionClassifier.EnableIntrinsics = !DisableIntrinsics; + IRSimilarityIdentifier &Identifier = getIRSI(M); SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity(); diff --git a/llvm/test/Transforms/IROutliner/different-intrinsics.ll b/llvm/test/Transforms/IROutliner/different-intrinsics.ll new file mode 100644 index 000000000000..a0fa2d2d5948 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/different-intrinsics.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we do not outline different intrinsics as the same +; function or as a value like we would for non-intrinsic functions. + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) + +define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} + +define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) +; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 +; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) +; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 +; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK-LABEL: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[C:%.*]] = add i8 [[TMP0:%.*]], [[TMP1:%.*]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[TMP2:%.*]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[RET]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal void @outlined_ir_func_1( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[A]], i8* [[TMP2:%.*]], align 1 +; CHECK-NEXT: store i8 [[B]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/illegal-memcpy.ll b/llvm/test/Transforms/IROutliner/illegal-memcpy.ll index 6c242d7b5e84..8bee43c77b84 100644 --- a/llvm/test/Transforms/IROutliner/illegal-memcpy.ll +++ b/llvm/test/Transforms/IROutliner/illegal-memcpy.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test checks that we do not outline memcpy intrinsics since it may require ; extra address space checks. diff --git a/llvm/test/Transforms/IROutliner/illegal-memmove.ll b/llvm/test/Transforms/IROutliner/illegal-memmove.ll index 0a9216425455..afc626f6abd2 100644 --- a/llvm/test/Transforms/IROutliner/illegal-memmove.ll +++ b/llvm/test/Transforms/IROutliner/illegal-memmove.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test checks that we do not outline memcpy intrinsics since it may require ; extra address space checks. diff --git a/llvm/test/Transforms/IROutliner/illegal-memset.ll b/llvm/test/Transforms/IROutliner/illegal-memset.ll index 4470d0c6d128..ed3eeb2d01b4 100644 --- a/llvm/test/Transforms/IROutliner/illegal-memset.ll +++ b/llvm/test/Transforms/IROutliner/illegal-memset.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test checks that we do not outline memset intrinsics since it requires ; extra address space checks. diff --git a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll index 28a1e5994e70..eaffefe3d9d5 100644 --- a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll +++ b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test ensures that we do not outline vararg instructions or intrinsics, as ; they may cause inconsistencies when outlining. diff --git a/llvm/test/Transforms/IROutliner/outline-memcpy.ll b/llvm/test/Transforms/IROutliner/outline-memcpy.ll new file mode 100644 index 000000000000..d5d4859c318e --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outline-memcpy.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we successfully outline identical memcpy instructions. + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) + +define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} + +define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP0]], i64 [[TMP2:%.*]], i1 false) +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[TMP0]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[RET]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/outline-memmove.ll b/llvm/test/Transforms/IROutliner/outline-memmove.ll new file mode 100644 index 000000000000..45e573c52653 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outline-memmove.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we sucecssfully outline identical memmove instructions. + +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) + +define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} + +define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1 +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP0]], i64 [[TMP2:%.*]], i1 false) +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[TMP0]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[RET]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/outline-memset.ll b/llvm/test/Transforms/IROutliner/outline-memset.ll new file mode 100644 index 000000000000..65dd04978d0a --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outline-memset.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we successfully outline identical memset instructions. + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + +define i64 @function1(i64 %x, i64 %z, i64 %n) { +entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 +} + +define i64 @function2(i64 %x, i64 %z, i64 %n) { +entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[POOL:%.*]] = alloca [59 x i64], align 4 +; CHECK-NEXT: call void @outlined_ir_func_0([59 x i64]* [[POOL]], i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]]) +; CHECK-NEXT: ret i64 0 +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[POOL:%.*]] = alloca [59 x i64], align 4 +; CHECK-NEXT: call void @outlined_ir_func_0([59 x i64]* [[POOL]], i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]]) +; CHECK-NEXT: ret i64 0 +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[TMP:%.*]] = bitcast [59 x i64]* [[TMP0:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP]], i8 0, i64 236, i1 false) +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[TMP1:%.*]], 0 +; CHECK-NEXT: [[A:%.*]] = add i64 [[TMP2:%.*]], [[TMP3:%.*]] +; CHECK-NEXT: [[C:%.*]] = add i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll b/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll new file mode 100644 index 000000000000..8e36335b3120 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we sucessfully outline identical memcpy var arg +; intrinsics, but not the var arg instruction itself. + +declare void @llvm.va_start(i8*) +declare void @llvm.va_copy(i8*, i8*) +declare void @llvm.va_end(i8*) + +define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %c = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store double %b, double* %b.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + call void @llvm.va_copy(i8* %v, i8* %ap1) + call void @llvm.va_end(i8* %ap1) + store i32 %0, i32* %c, align 4 + %tmp = load i32, i32* %c, align 4 + ret i32 %tmp +} + +define i32 @func2(i32 %a, double %b, i8* %v, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %c = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store double %b, double* %b.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + call void @llvm.va_copy(i8* %v, i8* %ap1) + call void @llvm.va_end(i8* %ap1) + store i32 %0, i32* %c, align 4 + %ap2 = bitcast i8** %ap to i8* + %tmp = load i32, i32* %c, align 4 + ret i32 %tmp +} +; CHECK-LABEL: @func1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]]) +; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[V:%.*]], i8* [[AP1]], i32 [[TMP0]], i32* [[C]]) +; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: ret i32 [[TMP]] +; +; +; CHECK-LABEL: @func2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]]) +; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[V:%.*]], i8* [[AP1]], i32 [[TMP0]], i32* [[C]]) +; CHECK-NEXT: [[AP2:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: ret i32 [[TMP]] +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: call void @llvm.va_copy(i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) +; CHECK-NEXT: call void @llvm.va_end(i8* [[TMP1]]) +; CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: ret void +; diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp index 97d9be3d8985..91a0594456a9 100644 --- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp +++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -1507,7 +1507,8 @@ TEST(IRInstructionMapper, CleanuppadIllegal) { // are considered illegal since is extra checking needed to handle the address // space checking. -// Checks that a memset instruction is mapped to an illegal value. +// Checks that a memset instruction is mapped to an illegal value when +// specified. TEST(IRInstructionMapper, MemSetIllegal) { StringRef ModuleString = R"( declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) @@ -1531,6 +1532,7 @@ TEST(IRInstructionMapper, MemSetIllegal) { SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); @@ -1538,7 +1540,8 @@ TEST(IRInstructionMapper, MemSetIllegal) { ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[0]); } -// Checks that a memcpy instruction is mapped to an illegal value. +// Checks that a memcpy instruction is mapped to an illegal value when +// specified. TEST(IRInstructionMapper, MemCpyIllegal) { StringRef ModuleString = R"( declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) @@ -1562,6 +1565,7 @@ TEST(IRInstructionMapper, MemCpyIllegal) { SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); @@ -1570,7 +1574,8 @@ TEST(IRInstructionMapper, MemCpyIllegal) { ASSERT_LT(UnsignedVec[2], UnsignedVec[0]); } -// Checks that a memmove instruction is mapped to an illegal value. +// Checks that a memmove instruction is mapped to an illegal value when +// specified. TEST(IRInstructionMapper, MemMoveIllegal) { StringRef ModuleString = R"( declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) @@ -1594,6 +1599,7 @@ TEST(IRInstructionMapper, MemMoveIllegal) { SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); @@ -1601,6 +1607,45 @@ TEST(IRInstructionMapper, MemMoveIllegal) { ASSERT_LT(UnsignedVec[2], UnsignedVec[0]); } +// Checks that mem* instructions are mapped to an legal value when not +// specified, and that all the intrinsics are marked differently. +TEST(IRInstructionMapper, MemOpsLegal) { + StringRef ModuleString = R"( + declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + + define i64 @function(i64 %x, i64 %z, i64 %n) { + entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memmove.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + call void @llvm.memcpy.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = true; + getVectors(*M, Mapper, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(9)); + ASSERT_LT(UnsignedVec[2], UnsignedVec[3]); + ASSERT_LT(UnsignedVec[3], UnsignedVec[4]); + ASSERT_LT(UnsignedVec[4], UnsignedVec[5]); +} + // Checks that a variable argument instructions are mapped to an illegal value. // We exclude variable argument instructions since variable arguments // requires extra checking of the argument list. @@ -1642,6 +1687,7 @@ TEST(IRInstructionMapper, VarArgsIllegal) { SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size());