[IRSim][IROutliner] Allowing Intrinsic Calls to be Used in Similarity Matching and Outlined Regions

Due to some complications with lifetime, and assume-like intrinsics, intrinsics were not included as outlinable instructions. This patch opens up most intrinsics, excluding lifetime and assume-like intrinsics, to be outlined. For similarity, it is required that the intrinsic IDs, and the intrinsics names match exactly, as well as the function type. This puts intrinsics in a different class than normal call instructions (https://reviews.llvm.org/D109448), where the name will no longer have to match.

This also adds an additional command line flag debug option to disable outlining intrinsics.

Recommit of: 8de76bd569
Adds extra checking of intrinsic function calls names to avoid taking the address of intrinsic calls when extracting function calls.

Reviewers: paquette, jroelofs

Differential Revision: https://reviews.llvm.org/D109450
This commit is contained in:
Andrew Litteken 2021-12-22 18:07:43 -06:00
parent 33b38339a0
commit 3785c1d055
14 changed files with 486 additions and 16 deletions

View File

@ -262,7 +262,20 @@ struct IRInstructionData
llvm::hash_value(ID.Inst->getType()),
llvm::hash_value(ID.getPredicate()),
llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
else if (isa<CallInst>(ID.Inst)) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(ID.Inst)) {
// To hash intrinsics, we use the opcode, and types like the other
// instructions, but also, the Intrinsic ID, and the Name of the
// intrinsic.
Intrinsic::ID IntrinsicID = II->getIntrinsicID();
return llvm::hash_combine(
llvm::hash_value(ID.Inst->getOpcode()),
llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID),
llvm::hash_value(*ID.CalleeName),
llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
}
if (isa<CallInst>(ID.Inst)) {
std::string FunctionName = *ID.CalleeName;
return llvm::hash_combine(
llvm::hash_value(ID.Inst->getOpcode()),
@ -270,6 +283,7 @@ struct IRInstructionData
llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName),
llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
}
return llvm::hash_combine(
llvm::hash_value(ID.Inst->getOpcode()),
llvm::hash_value(ID.Inst->getType()),
@ -525,8 +539,17 @@ struct IRInstructionMapper {
// analyzed for similarity as it has no bearing on the outcome of the
// program.
InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; }
// TODO: Handle specific intrinsics.
InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; }
InstrType visitIntrinsicInst(IntrinsicInst &II) {
// These are disabled due to complications in the CodeExtractor when
// outlining these instructions. For instance, It is unclear what we
// should do when moving only the start or end lifetime instruction into
// an outlined function. Also, assume-like intrinsics could be removed
// from the region, removing arguments, causing discrepencies in the
// number of inputs between different regions.
if (II.isLifetimeStartOrEnd() || II.isAssumeLikeIntrinsic())
return Illegal;
return EnableIntrinsics ? Legal : Illegal;
}
// We only allow call instructions where the function has a name and
// is not an indirect call.
InstrType visitCallInst(CallInst &CI) {
@ -553,6 +576,10 @@ struct IRInstructionMapper {
// The flag variable that lets the classifier know whether we should
// allow indirect calls to be considered legal instructions.
bool EnableIndirectCalls = false;
// Flag that lets the classifier know whether we should allow intrinsics to
// be checked for similarity.
bool EnableIntrinsics = false;
};
/// Maps an Instruction to a member of InstrType.
@ -939,10 +966,12 @@ class IRSimilarityIdentifier {
public:
IRSimilarityIdentifier(bool MatchBranches = true,
bool MatchIndirectCalls = true,
bool MatchCallsWithName = false)
bool MatchCallsWithName = false,
bool MatchIntrinsics = true)
: Mapper(&InstDataAllocator, &InstDataListAllocator),
EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls),
EnableMatchingCallsByName(MatchCallsWithName) {}
EnableMatchingCallsByName(MatchCallsWithName),
EnableIntrinsics(MatchIntrinsics) {}
private:
/// Map the instructions in the module to unsigned integers, using mapping
@ -1031,6 +1060,10 @@ private:
/// convention, attributes and type signature.
bool EnableMatchingCallsByName = true;
/// The flag variable that marks whether we should check intrinsics for
/// similarity.
bool EnableIntrinsics = true;
/// The SimilarityGroups found with the most recent run of \ref
/// findSimilarity. None if there is no recent run.
Optional<SimilarityGroupList> SimilarityCandidates;

View File

@ -359,7 +359,7 @@ private:
bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
// TODO: Handle specific intrinsics individually from those that can be
// handled.
bool IntrinsicInst(IntrinsicInst &II) { return false; }
bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; }
// We only handle CallInsts that are not indirect, since we cannot guarantee
// that they have a name in these cases.
bool visitCallInst(CallInst &CI) {
@ -395,6 +395,10 @@ private:
// The flag variable that marks whether we should allow indirect calls
// to be outlined.
bool EnableIndirectCalls = true;
// The flag variable that marks whether we should allow intrinsics
// instructions to be outlined.
bool EnableIntrinsics = false;
};
/// A InstVisitor used to exclude certain instructions from being outlined.

View File

@ -39,6 +39,10 @@ cl::opt<bool>
MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
cl::desc("only allow matching call instructions if the "
"name and type signature match."));
cl::opt<bool>
DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
cl::desc("Don't match or outline intrinsics"));
} // namespace llvm
IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
@ -109,6 +113,24 @@ void IRInstructionData::setCalleeName(bool MatchByName) {
assert(CI && "Instruction must be call");
CalleeName = "";
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
// To hash intrinsics, we use the opcode, and types like the other
// instructions, but also, the Intrinsic ID, and the Name of the
// intrinsic.
Intrinsic::ID IntrinsicID = II->getIntrinsicID();
FunctionType *FT = II->getFunctionType();
// If there is an overloaded name, we have to use the complex version
// of getName to get the entire string.
if (Intrinsic::isOverloaded(IntrinsicID))
CalleeName =
Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
// If there is not an overloaded name, we only need to use this version.
else
CalleeName = Intrinsic::getName(IntrinsicID).str();
return;
}
if (!CI->isIndirectCall() && MatchByName)
CalleeName = CI->getCalledFunction()->getName().str();
}
@ -1139,6 +1161,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
Mapper.InstClassifier.EnableBranches = this->EnableBranches;
Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
populateMapper(Modules, InstrList, IntegerMapping);
findCandidates(InstrList, IntegerMapping);
@ -1151,6 +1174,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
Mapper.InstClassifier.EnableBranches = this->EnableBranches;
Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> IntegerMapping;
@ -1172,7 +1196,7 @@ IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
MatchCallsByName));
MatchCallsByName, !DisableIntrinsics));
return false;
}
@ -1189,9 +1213,8 @@ bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
AnalysisKey IRSimilarityAnalysis::Key;
IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
ModuleAnalysisManager &) {
auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
MatchCallsByName);
MatchCallsByName, !DisableIntrinsics);
IRSI.findSimilarity(M);
return IRSI;
}

View File

@ -42,6 +42,11 @@ extern cl::opt<bool> DisableBranches;
// A command flag to be used for debugging to indirect calls from similarity
// matching and outlining.
extern cl::opt<bool> DisableIndirectCalls;
// A command flag to be used for debugging to exclude intrinsics from similarity
// matching and outlining.
extern cl::opt<bool> DisableIntrinsics;
} // namespace llvm
// Set to true if the user wants the ir outliner to run on linkonceodr linkage
@ -2610,6 +2615,8 @@ unsigned IROutliner::doOutline(Module &M) {
// Find the possible similarity sections.
InstructionClassifier.EnableBranches = !DisableBranches;
InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
IRSimilarityIdentifier &Identifier = getIRSI(M);
SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();

View File

@ -0,0 +1,92 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that we do not outline different intrinsics as the same
; function or as a value like we would for non-intrinsic functions.
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) {
entry:
%a = load i8, i8* %s
%b = load i8, i8* %d
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
%c = add i8 %a, %b
%ret = load i8, i8* %s
ret i8 %ret
}
define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) {
entry:
%a = load i8, i8* %s
%b = load i8, i8* %d
call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
%c = add i8 %a, %b
%ret = load i8, i8* %s
ret i8 %ret
}
; CHECK-LABEL: @function1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]])
; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]])
; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1
; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]])
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false)
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]])
; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: ret i8 [[RET_RELOAD]]
;
;
; CHECK-LABEL: @function2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]])
; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]])
; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1
; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]])
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false)
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]])
; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: ret i8 [[RET_RELOAD]]
;
;
; CHECK-LABEL: define internal void @outlined_ir_func_0(
; CHECK-NEXT: newFuncRoot:
; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]]
; CHECK: entry_to_outline:
; CHECK-NEXT: [[C:%.*]] = add i8 [[TMP0:%.*]], [[TMP1:%.*]]
; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[TMP2:%.*]], align 1
; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
; CHECK: entry_after_outline.exitStub:
; CHECK-NEXT: store i8 [[RET]], i8* [[TMP3:%.*]], align 1
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define internal void @outlined_ir_func_1(
; CHECK-NEXT: newFuncRoot:
; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]]
; CHECK: entry_to_outline:
; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1
; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1
; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
; CHECK: entry_after_outline.exitStub:
; CHECK-NEXT: store i8 [[A]], i8* [[TMP2:%.*]], align 1
; CHECK-NEXT: store i8 [[B]], i8* [[TMP3:%.*]], align 1
; CHECK-NEXT: ret void
;

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s
; This test checks that we do not outline memcpy intrinsics since it may require
; extra address space checks.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s
; This test checks that we do not outline memcpy intrinsics since it may require
; extra address space checks.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s
; This test checks that we do not outline memset intrinsics since it requires
; extra address space checks.

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s
; This test ensures that we do not outline vararg instructions or intrinsics, as
; they may cause inconsistencies when outlining.

View File

@ -0,0 +1,60 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that we successfully outline identical memcpy instructions.
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) {
entry:
%a = load i8, i8* %s
%b = load i8, i8* %d
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
%c = add i8 %a, %b
%ret = load i8, i8* %s
ret i8 %ret
}
define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) {
entry:
%a = load i8, i8* %s
%b = load i8, i8* %d
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
%c = add i8 %a, %b
%ret = load i8, i8* %s
ret i8 %ret
}
; CHECK-LABEL: @function1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]])
; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: ret i8 [[RET_RELOAD]]
;
;
; CHECK-LABEL: @function2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]])
; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: ret i8 [[RET_RELOAD]]
;
;
; CHECK: define internal void @outlined_ir_func_0(
; CHECK-NEXT: newFuncRoot:
; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]]
; CHECK: entry_to_outline:
; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1
; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP0]], i64 [[TMP2:%.*]], i1 false)
; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]]
; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[TMP0]], align 1
; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
; CHECK: entry_after_outline.exitStub:
; CHECK-NEXT: store i8 [[RET]], i8* [[TMP3:%.*]], align 1
; CHECK-NEXT: ret void
;

View File

@ -0,0 +1,60 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that we sucecssfully outline identical memmove instructions.
declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) {
entry:
%a = load i8, i8* %s
%b = load i8, i8* %d
call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
%c = add i8 %a, %b
%ret = load i8, i8* %s
ret i8 %ret
}
define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) {
entry:
%a = load i8, i8* %s
%b = load i8, i8* %d
call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
%c = add i8 %a, %b
%ret = load i8, i8* %s
ret i8 %ret
}
; CHECK-LABEL: @function1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]])
; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: ret i8 [[RET_RELOAD]]
;
;
; CHECK-LABEL: @function2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]])
; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: ret i8 [[RET_RELOAD]]
;
;
; CHECK: define internal void @outlined_ir_func_0(
; CHECK-NEXT: newFuncRoot:
; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]]
; CHECK: entry_to_outline:
; CHECK-NEXT: [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1
; CHECK-NEXT: [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP0]], i64 [[TMP2:%.*]], i1 false)
; CHECK-NEXT: [[C:%.*]] = add i8 [[A]], [[B]]
; CHECK-NEXT: [[RET:%.*]] = load i8, i8* [[TMP0]], align 1
; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
; CHECK: entry_after_outline.exitStub:
; CHECK-NEXT: store i8 [[RET]], i8* [[TMP3:%.*]], align 1
; CHECK-NEXT: ret void
;

View File

@ -0,0 +1,55 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that we successfully outline identical memset instructions.
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
define i64 @function1(i64 %x, i64 %z, i64 %n) {
entry:
%pool = alloca [59 x i64], align 4
%tmp = bitcast [59 x i64]* %pool to i8*
call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
%cmp3 = icmp eq i64 %n, 0
%a = add i64 %x, %z
%c = add i64 %x, %z
ret i64 0
}
define i64 @function2(i64 %x, i64 %z, i64 %n) {
entry:
%pool = alloca [59 x i64], align 4
%tmp = bitcast [59 x i64]* %pool to i8*
call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
%cmp3 = icmp eq i64 %n, 0
%a = add i64 %x, %z
%c = add i64 %x, %z
ret i64 0
}
; CHECK-LABEL: @function1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[POOL:%.*]] = alloca [59 x i64], align 4
; CHECK-NEXT: call void @outlined_ir_func_0([59 x i64]* [[POOL]], i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]])
; CHECK-NEXT: ret i64 0
;
;
; CHECK-LABEL: @function2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[POOL:%.*]] = alloca [59 x i64], align 4
; CHECK-NEXT: call void @outlined_ir_func_0([59 x i64]* [[POOL]], i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]])
; CHECK-NEXT: ret i64 0
;
;
; CHECK: define internal void @outlined_ir_func_0(
; CHECK-NEXT: newFuncRoot:
; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]]
; CHECK: entry_to_outline:
; CHECK-NEXT: [[TMP:%.*]] = bitcast [59 x i64]* [[TMP0:%.*]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP]], i8 0, i64 236, i1 false)
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[TMP1:%.*]], 0
; CHECK-NEXT: [[A:%.*]] = add i64 [[TMP2:%.*]], [[TMP3:%.*]]
; CHECK-NEXT: [[C:%.*]] = add i64 [[TMP2]], [[TMP3]]
; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
; CHECK: entry_after_outline.exitStub:
; CHECK-NEXT: ret void
;

View File

@ -0,0 +1,90 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
; This test checks that we sucessfully outline identical memcpy var arg
; intrinsics, but not the var arg instruction itself.
declare void @llvm.va_start(i8*)
declare void @llvm.va_copy(i8*, i8*)
declare void @llvm.va_end(i8*)
define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind {
entry:
%a.addr = alloca i32, align 4
%b.addr = alloca double, align 8
%ap = alloca i8*, align 4
%c = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
store double %b, double* %b.addr, align 8
%ap1 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap1)
%0 = va_arg i8** %ap, i32
call void @llvm.va_copy(i8* %v, i8* %ap1)
call void @llvm.va_end(i8* %ap1)
store i32 %0, i32* %c, align 4
%tmp = load i32, i32* %c, align 4
ret i32 %tmp
}
define i32 @func2(i32 %a, double %b, i8* %v, ...) nounwind {
entry:
%a.addr = alloca i32, align 4
%b.addr = alloca double, align 8
%ap = alloca i8*, align 4
%c = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
store double %b, double* %b.addr, align 8
%ap1 = bitcast i8** %ap to i8*
call void @llvm.va_start(i8* %ap1)
%0 = va_arg i8** %ap, i32
call void @llvm.va_copy(i8* %v, i8* %ap1)
call void @llvm.va_end(i8* %ap1)
store i32 %0, i32* %c, align 4
%ap2 = bitcast i8** %ap to i8*
%tmp = load i32, i32* %c, align 4
ret i32 %tmp
}
; CHECK-LABEL: @func1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8
; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8*
; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]])
; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32
; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[V:%.*]], i8* [[AP1]], i32 [[TMP0]], i32* [[C]])
; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4
; CHECK-NEXT: ret i32 [[TMP]]
;
;
; CHECK-LABEL: @func2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8
; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
; CHECK-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8
; CHECK-NEXT: [[AP1:%.*]] = bitcast i8** [[AP]] to i8*
; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1]])
; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32
; CHECK-NEXT: call void @outlined_ir_func_0(i8* [[V:%.*]], i8* [[AP1]], i32 [[TMP0]], i32* [[C]])
; CHECK-NEXT: [[AP2:%.*]] = bitcast i8** [[AP]] to i8*
; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4
; CHECK-NEXT: ret i32 [[TMP]]
;
;
; CHECK: define internal void @outlined_ir_func_0(
; CHECK-NEXT: newFuncRoot:
; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]]
; CHECK: entry_to_outline:
; CHECK-NEXT: call void @llvm.va_copy(i8* [[TMP0:%.*]], i8* [[TMP1:%.*]])
; CHECK-NEXT: call void @llvm.va_end(i8* [[TMP1]])
; CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[TMP3:%.*]], align 4
; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
; CHECK: entry_after_outline.exitStub:
; CHECK-NEXT: ret void
;

View File

@ -1507,7 +1507,8 @@ TEST(IRInstructionMapper, CleanuppadIllegal) {
// are considered illegal since is extra checking needed to handle the address
// space checking.
// Checks that a memset instruction is mapped to an illegal value.
// Checks that a memset instruction is mapped to an illegal value when
// specified.
TEST(IRInstructionMapper, MemSetIllegal) {
StringRef ModuleString = R"(
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
@ -1531,6 +1532,7 @@ TEST(IRInstructionMapper, MemSetIllegal) {
SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
Mapper.InstClassifier.EnableIntrinsics = false;
getVectors(*M, Mapper, InstrList, UnsignedVec);
ASSERT_EQ(InstrList.size(), UnsignedVec.size());
@ -1538,7 +1540,8 @@ TEST(IRInstructionMapper, MemSetIllegal) {
ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[0]);
}
// Checks that a memcpy instruction is mapped to an illegal value.
// Checks that a memcpy instruction is mapped to an illegal value when
// specified.
TEST(IRInstructionMapper, MemCpyIllegal) {
StringRef ModuleString = R"(
declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
@ -1562,6 +1565,7 @@ TEST(IRInstructionMapper, MemCpyIllegal) {
SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
Mapper.InstClassifier.EnableIntrinsics = false;
getVectors(*M, Mapper, InstrList, UnsignedVec);
ASSERT_EQ(InstrList.size(), UnsignedVec.size());
@ -1570,7 +1574,8 @@ TEST(IRInstructionMapper, MemCpyIllegal) {
ASSERT_LT(UnsignedVec[2], UnsignedVec[0]);
}
// Checks that a memmove instruction is mapped to an illegal value.
// Checks that a memmove instruction is mapped to an illegal value when
// specified.
TEST(IRInstructionMapper, MemMoveIllegal) {
StringRef ModuleString = R"(
declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
@ -1594,6 +1599,7 @@ TEST(IRInstructionMapper, MemMoveIllegal) {
SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
Mapper.InstClassifier.EnableIntrinsics = false;
getVectors(*M, Mapper, InstrList, UnsignedVec);
ASSERT_EQ(InstrList.size(), UnsignedVec.size());
@ -1601,6 +1607,45 @@ TEST(IRInstructionMapper, MemMoveIllegal) {
ASSERT_LT(UnsignedVec[2], UnsignedVec[0]);
}
// Checks that mem* instructions are mapped to an legal value when not
// specified, and that all the intrinsics are marked differently.
TEST(IRInstructionMapper, MemOpsLegal) {
StringRef ModuleString = R"(
declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
define i64 @function(i64 %x, i64 %z, i64 %n) {
entry:
%pool = alloca [59 x i64], align 4
%tmp = bitcast [59 x i64]* %pool to i8*
call void @llvm.memmove.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
call void @llvm.memcpy.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
%cmp3 = icmp eq i64 %n, 0
%a = add i64 %x, %z
%c = add i64 %x, %z
ret i64 0
})";
LLVMContext Context;
std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> UnsignedVec;
SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
Mapper.InstClassifier.EnableIntrinsics = true;
getVectors(*M, Mapper, InstrList, UnsignedVec);
ASSERT_EQ(InstrList.size(), UnsignedVec.size());
ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(9));
ASSERT_LT(UnsignedVec[2], UnsignedVec[3]);
ASSERT_LT(UnsignedVec[3], UnsignedVec[4]);
ASSERT_LT(UnsignedVec[4], UnsignedVec[5]);
}
// Checks that a variable argument instructions are mapped to an illegal value.
// We exclude variable argument instructions since variable arguments
// requires extra checking of the argument list.
@ -1642,6 +1687,7 @@ TEST(IRInstructionMapper, VarArgsIllegal) {
SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
Mapper.InstClassifier.EnableIntrinsics = false;
getVectors(*M, Mapper, InstrList, UnsignedVec);
ASSERT_EQ(InstrList.size(), UnsignedVec.size());