From 8de76bd569732acae6a10fdcb0152a49f7d4cd39 Mon Sep 17 00:00:00 2001 From: Andrew Litteken Date: Wed, 22 Dec 2021 18:07:43 -0600 Subject: [PATCH] [IRSim][IROutliner] Allowing Intrinsic Calls to be Used in Similarity Matching and Outlined Regions Due to some complications with lifetime, and assume-like intrinsics, intrinsics were not included as outlinable instructions. This patch opens up most intrinsics, excluding lifetime and assume-like intrinsics, to be outlined. For similarity, it is required that the intrinsic IDs, and the intrinsics names match exactly, as well as the function type. This puts intrinsics in a different class than normal call instructions (https://reviews.llvm.org/D109448), where the name will no longer have to match. This also adds an additional command line flag debug option to disable outlining intrinsics. Reviewers: paquette, jroelofs Differential Revision: https://reviews.llvm.org/D109450 --- .../llvm/Analysis/IRSimilarityIdentifier.h | 53 ++++++++++- llvm/include/llvm/Transforms/IPO/IROutliner.h | 6 +- llvm/lib/Analysis/IRSimilarityIdentifier.cpp | 13 ++- llvm/lib/Transforms/IPO/IROutliner.cpp | 9 +- .../IROutliner/different-intrinsics.ll | 92 +++++++++++++++++++ .../Transforms/IROutliner/illegal-memcpy.ll | 2 +- .../Transforms/IROutliner/illegal-memmove.ll | 2 +- .../Transforms/IROutliner/illegal-memset.ll | 2 +- .../Transforms/IROutliner/illegal-vaarg.ll | 2 +- .../Transforms/IROutliner/outline-memcpy.ll | 60 ++++++++++++ .../Transforms/IROutliner/outline-memmove.ll | 60 ++++++++++++ .../Transforms/IROutliner/outline-memset.ll | 55 +++++++++++ .../IROutliner/outline-vaarg-intrinsic.ll | 90 ++++++++++++++++++ .../Analysis/IRSimilarityIdentifierTest.cpp | 52 ++++++++++- 14 files changed, 480 insertions(+), 18 deletions(-) create mode 100644 llvm/test/Transforms/IROutliner/different-intrinsics.ll create mode 100644 llvm/test/Transforms/IROutliner/outline-memcpy.ll create mode 100644 llvm/test/Transforms/IROutliner/outline-memmove.ll create mode 100644 llvm/test/Transforms/IROutliner/outline-memset.ll create mode 100644 llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index 966bf02d128e..73dd3219aa13 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -252,7 +252,30 @@ struct IRInstructionData llvm::hash_value(ID.Inst->getType()), llvm::hash_value(ID.getPredicate()), llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); - else if (CallInst *CI = dyn_cast(ID.Inst)) { + + if (IntrinsicInst *II = dyn_cast(ID.Inst)) { + // To hash intrinsics, we use the opcode, and types like the other + // instructions, but also, the Intrinsic ID, and the Name of the + // intrinsic. + Intrinsic::ID IntrinsicID = II->getIntrinsicID(); + FunctionType *FT = II->getFunctionType(); + std::string Name; + // If there is an overloaded name, we have to use the complex version + // of getName to get the entire string. + if (Intrinsic::isOverloaded(IntrinsicID)) + Name = + Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT); + // If there is not an overloaded name, we only need to use this version. + else + Name = Intrinsic::getName(IntrinsicID).str(); + return llvm::hash_combine( + llvm::hash_value(ID.Inst->getOpcode()), + llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID), + llvm::hash_value(Name), + llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); + } + + if (isa(ID.Inst)) { std::string FunctionName = *ID.CalleeName; return llvm::hash_combine( llvm::hash_value(ID.Inst->getOpcode()), @@ -260,6 +283,7 @@ struct IRInstructionData llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName), llvm::hash_combine_range(OperTypes.begin(), OperTypes.end())); } + return llvm::hash_combine( llvm::hash_value(ID.Inst->getOpcode()), llvm::hash_value(ID.Inst->getType()), @@ -512,8 +536,17 @@ struct IRInstructionMapper { // analyzed for similarity as it has no bearing on the outcome of the // program. InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; } - // TODO: Handle specific intrinsics. - InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; } + InstrType visitIntrinsicInst(IntrinsicInst &II) { + // These are disabled due to complications in the CodeExtractor when + // outlining these instructions. For instance, It is unclear what we + // should do when moving only the start or end lifetime instruction into + // an outlined function. Also, assume-like intrinsics could be removed + // from the region, removing arguments, causing discrepencies in the + // number of inputs between different regions. + if (II.isLifetimeStartOrEnd() || II.isAssumeLikeIntrinsic()) + return Illegal; + return EnableIntrinsics ? Legal : Illegal; + } // We only allow call instructions where the function has a name and // is not an indirect call. InstrType visitCallInst(CallInst &CI) { @@ -540,6 +573,10 @@ struct IRInstructionMapper { // The flag variable that lets the classifier know whether we should // allow indirect calls to be considered legal instructions. bool EnableIndirectCalls = false; + + // Flag that lets the classifier know whether we should allow intrinsics to + // be checked for similarity. + bool EnableIntrinsics = false; }; /// Maps an Instruction to a member of InstrType. @@ -926,10 +963,12 @@ class IRSimilarityIdentifier { public: IRSimilarityIdentifier(bool MatchBranches = true, bool MatchIndirectCalls = true, - bool MatchCallsWithName = false) + bool MatchCallsWithName = false, + bool MatchIntrinsics = true) : Mapper(&InstDataAllocator, &InstDataListAllocator), EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls), - EnableMatchingCallsByName(MatchCallsWithName) {} + EnableMatchingCallsByName(MatchCallsWithName), + EnableIntrinsics(MatchIntrinsics) {} private: /// Map the instructions in the module to unsigned integers, using mapping @@ -1018,6 +1057,10 @@ private: /// convention, attributes and type signature. bool EnableMatchingCallsByName = true; + /// The flag variable that marks whether we should check intrinsics for + /// similarity. + bool EnableIntrinsics = true; + /// The SimilarityGroups found with the most recent run of \ref /// findSimilarity. None if there is no recent run. Optional SimilarityCandidates; diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h index 9799737a529e..0364fba86581 100644 --- a/llvm/include/llvm/Transforms/IPO/IROutliner.h +++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -360,7 +360,7 @@ private: bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; } // TODO: Handle specific intrinsics individually from those that can be // handled. - bool IntrinsicInst(IntrinsicInst &II) { return false; } + bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; } // We only handle CallInsts that are not indirect, since we cannot guarantee // that they have a name in these cases. bool visitCallInst(CallInst &CI) { @@ -396,6 +396,10 @@ private: // The flag variable that marks whether we should allow indirect calls // to be outlined. bool EnableIndirectCalls = true; + + // The flag variable that marks whether we should allow intrinsics + // instructions to be outlined. + bool EnableIntrinsics = false; }; /// A InstVisitor used to exclude certain instructions from being outlined. diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index 8c3c0400d601..aa5aadae6032 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -29,7 +29,6 @@ cl::opt cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes.")); -} // namespace llvm cl::opt DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), @@ -41,6 +40,9 @@ cl::opt cl::desc("only allow matching call instructions if the " "name and type signature match.")); +cl::opt + DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, + cl::desc("Don't match or outline intrinsics")); IRInstructionData::IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDList) @@ -48,6 +50,8 @@ IRInstructionData::IRInstructionData(Instruction &I, bool Legality, initializeInstruction(); } +} // namespace llvm + void IRInstructionData::initializeInstruction() { // We check for whether we have a comparison instruction. If it is, we // find the "less than" version of the predicate for consistency for @@ -1103,6 +1107,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( Mapper.InstClassifier.EnableBranches = this->EnableBranches; Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; + Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; populateMapper(Modules, InstrList, IntegerMapping); findCandidates(InstrList, IntegerMapping); @@ -1115,6 +1120,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { Mapper.InstClassifier.EnableBranches = this->EnableBranches; Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; + Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; std::vector InstrList; std::vector IntegerMapping; @@ -1136,7 +1142,7 @@ IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, - MatchCallsByName)); + MatchCallsByName, !DisableIntrinsics)); return false; } @@ -1153,9 +1159,8 @@ bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { AnalysisKey IRSimilarityAnalysis::Key; IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, ModuleAnalysisManager &) { - auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, - MatchCallsByName); + MatchCallsByName, !DisableIntrinsics); IRSI.findSimilarity(M); return IRSI; } diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index 9c79972443fe..fa2dadbb2d0d 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -38,12 +38,17 @@ using namespace IRSimilarity; // matching and outlining. namespace llvm { extern cl::opt DisableBranches; -} // namespace llvm // A command flag to be used for debugging to indirect calls from similarity // matching and outlining. extern cl::opt DisableIndirectCalls; +// A command flag to be used for debugging to exclude intrinsics from similarity +// matching and outlining. +extern cl::opt DisableIntrinsics; + +} // namespace llvm + // Set to true if the user wants the ir outliner to run on linkonceodr linkage // functions. This is false by default because the linker can dedupe linkonceodr // functions. Since the outliner is confined to a single module (modulo LTO), @@ -2524,6 +2529,8 @@ unsigned IROutliner::doOutline(Module &M) { // Find the possible similarity sections. InstructionClassifier.EnableBranches = !DisableBranches; InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls; + InstructionClassifier.EnableIntrinsics = !DisableIntrinsics; + IRSimilarityIdentifier &Identifier = getIRSI(M); SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity(); diff --git a/llvm/test/Transforms/IROutliner/different-intrinsics.ll b/llvm/test/Transforms/IROutliner/different-intrinsics.ll new file mode 100644 index 000000000000..a0fa2d2d5948 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/different-intrinsics.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we do not outline different intrinsics as the same +; function or as a value like we would for non-intrinsic functions. + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) + +define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} + +define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) +; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 +; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) +; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 +; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK-LABEL: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[C:%.*]] = add i8 [[TMP0:%.*]], [[TMP1:%.*]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[TMP2:%.*]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[RET]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal void @outlined_ir_func_1( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[A]], i8* [[TMP2:%.*]], align 1 +; CHECK-NEXT: store i8 [[B]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/illegal-memcpy.ll b/llvm/test/Transforms/IROutliner/illegal-memcpy.ll index 6c242d7b5e84..8bee43c77b84 100644 --- a/llvm/test/Transforms/IROutliner/illegal-memcpy.ll +++ b/llvm/test/Transforms/IROutliner/illegal-memcpy.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test checks that we do not outline memcpy intrinsics since it may require ; extra address space checks. diff --git a/llvm/test/Transforms/IROutliner/illegal-memmove.ll b/llvm/test/Transforms/IROutliner/illegal-memmove.ll index 0a9216425455..afc626f6abd2 100644 --- a/llvm/test/Transforms/IROutliner/illegal-memmove.ll +++ b/llvm/test/Transforms/IROutliner/illegal-memmove.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test checks that we do not outline memcpy intrinsics since it may require ; extra address space checks. diff --git a/llvm/test/Transforms/IROutliner/illegal-memset.ll b/llvm/test/Transforms/IROutliner/illegal-memset.ll index 4470d0c6d128..ed3eeb2d01b4 100644 --- a/llvm/test/Transforms/IROutliner/illegal-memset.ll +++ b/llvm/test/Transforms/IROutliner/illegal-memset.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test checks that we do not outline memset intrinsics since it requires ; extra address space checks. diff --git a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll index 28a1e5994e70..eaffefe3d9d5 100644 --- a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll +++ b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s ; This test ensures that we do not outline vararg instructions or intrinsics, as ; they may cause inconsistencies when outlining. diff --git a/llvm/test/Transforms/IROutliner/outline-memcpy.ll b/llvm/test/Transforms/IROutliner/outline-memcpy.ll new file mode 100644 index 000000000000..d5d4859c318e --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outline-memcpy.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we successfully outline identical memcpy instructions. + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) + +define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} + +define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP0]], i64 [[TMP2:%.*]], i1 false) +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[TMP0]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[RET]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/outline-memmove.ll b/llvm/test/Transforms/IROutliner/outline-memmove.ll new file mode 100644 index 000000000000..45e573c52653 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outline-memmove.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we sucecssfully outline identical memmove instructions. + +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) + +define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} + +define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1 +; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1 +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP0]], i64 [[TMP2:%.*]], i1 false) +; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]] +; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[TMP0]], align 1 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: store i8 [[RET]], i8* [[TMP3:%.*]], align 1 +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/outline-memset.ll b/llvm/test/Transforms/IROutliner/outline-memset.ll new file mode 100644 index 000000000000..65dd04978d0a --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outline-memset.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we successfully outline identical memset instructions. + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + +define i64 @function1(i64 %x, i64 %z, i64 %n) { +entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 +} + +define i64 @function2(i64 %x, i64 %z, i64 %n) { +entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 +} +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[POOL:%.*]] = alloca [59 x i64], align 4 +; CHECK-NEXT: call void @outlined_ir_func_0([59 x i64]* [[POOL]], i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]]) +; CHECK-NEXT: ret i64 0 +; +; +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[POOL:%.*]] = alloca [59 x i64], align 4 +; CHECK-NEXT: call void @outlined_ir_func_0([59 x i64]* [[POOL]], i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]]) +; CHECK-NEXT: ret i64 0 +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: [[TMP:%.*]] = bitcast [59 x i64]* [[TMP0:%.*]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP]], i8 0, i64 236, i1 false) +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[TMP1:%.*]], 0 +; CHECK-NEXT: [[A:%.*]] = add i64 [[TMP2:%.*]], [[TMP3:%.*]] +; CHECK-NEXT: [[C:%.*]] = add i64 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: ret void +; diff --git a/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll b/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll new file mode 100644 index 000000000000..8e36335b3120 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that we sucessfully outline identical memcpy var arg +; intrinsics, but not the var arg instruction itself. + +declare void @llvm.va_start(i8*) +declare void @llvm.va_copy(i8*, i8*) +declare void @llvm.va_end(i8*) + +define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %c = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store double %b, double* %b.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + call void @llvm.va_copy(i8* %v, i8* %ap1) + call void @llvm.va_end(i8* %ap1) + store i32 %0, i32* %c, align 4 + %tmp = load i32, i32* %c, align 4 + ret i32 %tmp +} + +define i32 @func2(i32 %a, double %b, i8* %v, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %c = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store double %b, double* %b.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + call void @llvm.va_copy(i8* %v, i8* %ap1) + call void @llvm.va_end(i8* %ap1) + store i32 %0, i32* %c, align 4 + %ap2 = bitcast i8** %ap to i8* + %tmp = load i32, i32* %c, align 4 + ret i32 %tmp +} +; CHECK-LABEL: @func1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]]) +; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[V:%.*]], i8* [[AP1]], i32 [[TMP0]], i32* [[C]]) +; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: ret i32 [[TMP]] +; +; +; CHECK-LABEL: @func2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 +; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]]) +; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 +; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[V:%.*]], i8* [[AP1]], i32 [[TMP0]], i32* [[C]]) +; CHECK-NEXT: [[AP2:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: ret i32 [[TMP]] +; +; +; CHECK: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] +; CHECK: entry_to_outline: +; CHECK-NEXT: call void @llvm.va_copy(i8* [[TMP0:%.*]], i8* [[TMP1:%.*]]) +; CHECK-NEXT: call void @llvm.va_end(i8* [[TMP1]]) +; CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[TMP3:%.*]], align 4 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: ret void +; diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp index a8781e8f78af..fb2cc0792162 100644 --- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp +++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -1479,7 +1479,8 @@ TEST(IRInstructionMapper, CleanuppadIllegal) { // are considered illegal since is extra checking needed to handle the address // space checking. -// Checks that a memset instruction is mapped to an illegal value. +// Checks that a memset instruction is mapped to an illegal value when +// specified. TEST(IRInstructionMapper, MemSetIllegal) { StringRef ModuleString = R"( declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) @@ -1503,6 +1504,7 @@ TEST(IRInstructionMapper, MemSetIllegal) { SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); @@ -1510,7 +1512,8 @@ TEST(IRInstructionMapper, MemSetIllegal) { ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[0]); } -// Checks that a memcpy instruction is mapped to an illegal value. +// Checks that a memcpy instruction is mapped to an illegal value when +// specified. TEST(IRInstructionMapper, MemCpyIllegal) { StringRef ModuleString = R"( declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) @@ -1534,6 +1537,7 @@ TEST(IRInstructionMapper, MemCpyIllegal) { SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); @@ -1542,7 +1546,8 @@ TEST(IRInstructionMapper, MemCpyIllegal) { ASSERT_LT(UnsignedVec[2], UnsignedVec[0]); } -// Checks that a memmove instruction is mapped to an illegal value. +// Checks that a memmove instruction is mapped to an illegal value when +// specified. TEST(IRInstructionMapper, MemMoveIllegal) { StringRef ModuleString = R"( declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) @@ -1566,6 +1571,7 @@ TEST(IRInstructionMapper, MemMoveIllegal) { SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size()); @@ -1573,6 +1579,45 @@ TEST(IRInstructionMapper, MemMoveIllegal) { ASSERT_LT(UnsignedVec[2], UnsignedVec[0]); } +// Checks that mem* instructions are mapped to an legal value when not +// specified, and that all the intrinsics are marked differently. +TEST(IRInstructionMapper, MemOpsLegal) { + StringRef ModuleString = R"( + declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + + define i64 @function(i64 %x, i64 %z, i64 %n) { + entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memmove.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + call void @llvm.memcpy.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 + })"; + LLVMContext Context; + std::unique_ptr M = makeLLVMModule(Context, ModuleString); + + std::vector InstrList; + std::vector UnsignedVec; + + SpecificBumpPtrAllocator InstDataAllocator; + SpecificBumpPtrAllocator IDLAllocator; + IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = true; + getVectors(*M, Mapper, InstrList, UnsignedVec); + + ASSERT_EQ(InstrList.size(), UnsignedVec.size()); + ASSERT_EQ(UnsignedVec.size(), static_cast(9)); + ASSERT_LT(UnsignedVec[2], UnsignedVec[3]); + ASSERT_LT(UnsignedVec[3], UnsignedVec[4]); + ASSERT_LT(UnsignedVec[4], UnsignedVec[5]); +} + // Checks that a variable argument instructions are mapped to an illegal value. // We exclude variable argument instructions since variable arguments // requires extra checking of the argument list. @@ -1614,6 +1659,7 @@ TEST(IRInstructionMapper, VarArgsIllegal) { SpecificBumpPtrAllocator InstDataAllocator; SpecificBumpPtrAllocator IDLAllocator; IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator); + Mapper.InstClassifier.EnableIntrinsics = false; getVectors(*M, Mapper, InstrList, UnsignedVec); ASSERT_EQ(InstrList.size(), UnsignedVec.size());