forked from OSchip/llvm-project
[FuncSpec] Support function specialization across multiple arguments.
The current implementation of Function Specialization does not allow specializing more than one arguments per function call, which is a limitation I am lifting with this patch. My main challenge was to choose the most suitable ADT for storing the specializations. We need an associative container for binding all the actual arguments of a specialization to the function call. We also need a consistent iteration order across executions. Lastly we want to be able to sort the entries by Gain and reject the least profitable ones. MapVector fits the bill but not quite; erasing elements is expensive and using stable_sort messes up the indices to the underlying vector. I am therefore using the underlying vector directly after calculating the Gain. Differential Revision: https://reviews.llvm.org/D119880
This commit is contained in:
parent
4ca111d4cb
commit
8045bf9d0d
|
@ -151,13 +151,14 @@ public:
|
|||
/// Return a reference to the set of argument tracked functions.
|
||||
SmallPtrSetImpl<Function *> &getArgumentTrackedFunctions();
|
||||
|
||||
/// Mark the constant argument of a new function specialization. \p F points
|
||||
/// to the cloned function and \p Arg represents the constant argument as a
|
||||
/// pair of {formal,actual} values (the formal argument is associated with the
|
||||
/// original function definition). All other arguments of the specialization
|
||||
/// inherit the lattice state of their corresponding values in the original
|
||||
/// function.
|
||||
void markArgInFuncSpecialization(Function *F, const ArgInfo &Arg);
|
||||
/// Mark the constant arguments of a new function specialization. \p F points
|
||||
/// to the cloned function and \p Args contains a list of constant arguments
|
||||
/// represented as pairs of {formal,actual} values (the formal argument is
|
||||
/// associated with the original function definition). All other arguments of
|
||||
/// the specialization inherit the lattice state of their corresponding values
|
||||
/// in the original function.
|
||||
void markArgInFuncSpecialization(Function *F,
|
||||
const SmallVectorImpl<ArgInfo> &Args);
|
||||
|
||||
/// Mark all of the blocks in function \p F non-executable. Clients can used
|
||||
/// this method to erase a function from the module (e.g., if it has been
|
||||
|
|
|
@ -99,8 +99,13 @@ static cl::opt<bool> SpecializeOnAddresses(
|
|||
"func-specialization-on-address", cl::init(false), cl::Hidden,
|
||||
cl::desc("Enable function specialization on the address of global values"));
|
||||
|
||||
// TODO: This needs checking to see the impact on compile-times, which is why
|
||||
// this is off by default for now.
|
||||
// Disabled by default as it can significantly increase compilation times.
|
||||
// Running nikic's compile time tracker on x86 with instruction count as the
|
||||
// metric shows 3-4% regression for SPASS while being neutral for all other
|
||||
// benchmarks of the llvm test suite.
|
||||
//
|
||||
// https://llvm-compile-time-tracker.com
|
||||
// https://github.com/nikic/llvm-compile-time-tracker
|
||||
static cl::opt<bool> EnableSpecializationForLiteralConstant(
|
||||
"function-specialization-for-literal-constant", cl::init(false), cl::Hidden,
|
||||
cl::desc("Enable specialization of functions that take a literal constant "
|
||||
|
@ -110,17 +115,17 @@ namespace {
|
|||
// Bookkeeping struct to pass data from the analysis and profitability phase
|
||||
// to the actual transform helper functions.
|
||||
struct SpecializationInfo {
|
||||
ArgInfo Arg; // Stores the {formal,actual} argument pair.
|
||||
InstructionCost Gain; // Profitability: Gain = Bonus - Cost.
|
||||
|
||||
SpecializationInfo(Argument *A, Constant *C, InstructionCost G)
|
||||
: Arg(A, C), Gain(G){};
|
||||
SmallVector<ArgInfo, 8> Args; // Stores the {formal,actual} argument pairs.
|
||||
InstructionCost Gain; // Profitability: Gain = Bonus - Cost.
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
using FuncList = SmallVectorImpl<Function *>;
|
||||
using ConstList = SmallVector<Constant *>;
|
||||
using SpecializationList = SmallVector<SpecializationInfo>;
|
||||
using CallArgBinding = std::pair<CallBase *, Constant *>;
|
||||
using CallSpecBinding = std::pair<CallBase *, SpecializationInfo>;
|
||||
// We are using MapVector because it guarantees deterministic iteration
|
||||
// order across executions.
|
||||
using SpecializationMap = SmallMapVector<CallBase *, SpecializationInfo, 8>;
|
||||
|
||||
// Helper to check if \p LV is either a constant or a constant
|
||||
// range with a single element. This should cover exactly the same cases as the
|
||||
|
@ -307,17 +312,15 @@ public:
|
|||
LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
|
||||
<< F->getName() << " is " << Cost << "\n");
|
||||
|
||||
SpecializationList Specializations;
|
||||
calculateGains(F, Cost, Specializations);
|
||||
if (Specializations.empty()) {
|
||||
LLVM_DEBUG(dbgs() << "FnSpecialization: no possible constants found\n");
|
||||
SmallVector<CallSpecBinding, 8> Specializations;
|
||||
if (!calculateGains(F, Cost, Specializations)) {
|
||||
LLVM_DEBUG(dbgs() << "FnSpecialization: No possible constants found\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
for (SpecializationInfo &S : Specializations) {
|
||||
specializeFunction(F, S, WorkList);
|
||||
Changed = true;
|
||||
}
|
||||
Changed = true;
|
||||
for (auto &Entry : Specializations)
|
||||
specializeFunction(F, Entry.second, WorkList);
|
||||
}
|
||||
|
||||
updateSpecializedFuncs(Candidates, WorkList);
|
||||
|
@ -392,21 +395,22 @@ private:
|
|||
return Clone;
|
||||
}
|
||||
|
||||
/// This function decides whether it's worthwhile to specialize function \p F
|
||||
/// based on the known constant values its arguments can take on, i.e. it
|
||||
/// calculates a gain and returns a list of actual arguments that are deemed
|
||||
/// profitable to specialize. Specialization is performed on the first
|
||||
/// interesting argument. Specializations based on additional arguments will
|
||||
/// be evaluated on following iterations of the main IPSCCP solve loop.
|
||||
void calculateGains(Function *F, InstructionCost Cost,
|
||||
SpecializationList &WorkList) {
|
||||
/// This function decides whether it's worthwhile to specialize function
|
||||
/// \p F based on the known constant values its arguments can take on. It
|
||||
/// only discovers potential specialization opportunities without actually
|
||||
/// applying them.
|
||||
///
|
||||
/// \returns true if any specializations have been found.
|
||||
bool calculateGains(Function *F, InstructionCost Cost,
|
||||
SmallVectorImpl<CallSpecBinding> &WorkList) {
|
||||
SpecializationMap Specializations;
|
||||
// Determine if we should specialize the function based on the values the
|
||||
// argument can take on. If specialization is not profitable, we continue
|
||||
// on to the next argument.
|
||||
for (Argument &FormalArg : F->args()) {
|
||||
// Determine if this argument is interesting. If we know the argument can
|
||||
// take on any constant values, they are collected in Constants.
|
||||
ConstList ActualArgs;
|
||||
SmallVector<CallArgBinding, 8> ActualArgs;
|
||||
if (!isArgumentInteresting(&FormalArg, ActualArgs)) {
|
||||
LLVM_DEBUG(dbgs() << "FnSpecialization: Argument "
|
||||
<< FormalArg.getNameOrAsOperand()
|
||||
|
@ -414,50 +418,56 @@ private:
|
|||
continue;
|
||||
}
|
||||
|
||||
for (auto *ActualArg : ActualArgs) {
|
||||
InstructionCost Gain =
|
||||
ForceFunctionSpecialization
|
||||
? 1
|
||||
: getSpecializationBonus(&FormalArg, ActualArg) - Cost;
|
||||
for (const auto &Entry : ActualArgs) {
|
||||
CallBase *Call = Entry.first;
|
||||
Constant *ActualArg = Entry.second;
|
||||
|
||||
if (Gain <= 0)
|
||||
continue;
|
||||
WorkList.push_back({&FormalArg, ActualArg, Gain});
|
||||
auto I = Specializations.insert({Call, SpecializationInfo()});
|
||||
SpecializationInfo &S = I.first->second;
|
||||
|
||||
if (I.second)
|
||||
S.Gain = ForceFunctionSpecialization ? 1 : 0 - Cost;
|
||||
if (!ForceFunctionSpecialization)
|
||||
S.Gain += getSpecializationBonus(&FormalArg, ActualArg);
|
||||
S.Args.push_back({&FormalArg, ActualArg});
|
||||
}
|
||||
|
||||
if (WorkList.empty())
|
||||
continue;
|
||||
|
||||
// Sort the candidates in descending order.
|
||||
llvm::stable_sort(WorkList, [](const SpecializationInfo &L,
|
||||
const SpecializationInfo &R) {
|
||||
return L.Gain > R.Gain;
|
||||
});
|
||||
|
||||
// Truncate the worklist to 'MaxClonesThreshold' candidates if
|
||||
// necessary.
|
||||
if (WorkList.size() > MaxClonesThreshold) {
|
||||
LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed "
|
||||
<< "the maximum number of clones threshold.\n"
|
||||
<< "FnSpecialization: Truncating worklist to "
|
||||
<< MaxClonesThreshold << " candidates.\n");
|
||||
WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end());
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function "
|
||||
<< F->getName() << "\n";
|
||||
for (SpecializationInfo &S
|
||||
: WorkList) {
|
||||
dbgs() << "FnSpecialization: FormalArg = "
|
||||
<< S.Arg.Formal->getNameOrAsOperand()
|
||||
<< ", ActualArg = "
|
||||
<< S.Arg.Actual->getNameOrAsOperand()
|
||||
<< ", Gain = " << S.Gain << "\n";
|
||||
});
|
||||
|
||||
// FIXME: Only one argument per function.
|
||||
break;
|
||||
}
|
||||
|
||||
// Remove unprofitable specializations.
|
||||
Specializations.remove_if(
|
||||
[](const auto &Entry) { return Entry.second.Gain <= 0; });
|
||||
|
||||
// Clear the MapVector and return the underlying vector.
|
||||
WorkList = Specializations.takeVector();
|
||||
|
||||
// Sort the candidates in descending order.
|
||||
llvm::stable_sort(WorkList, [](const auto &L, const auto &R) {
|
||||
return L.second.Gain > R.second.Gain;
|
||||
});
|
||||
|
||||
// Truncate the worklist to 'MaxClonesThreshold' candidates if necessary.
|
||||
if (WorkList.size() > MaxClonesThreshold) {
|
||||
LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed "
|
||||
<< "the maximum number of clones threshold.\n"
|
||||
<< "FnSpecialization: Truncating worklist to "
|
||||
<< MaxClonesThreshold << " candidates.\n");
|
||||
WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end());
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function "
|
||||
<< F->getName() << "\n";
|
||||
for (const auto &Entry
|
||||
: WorkList) {
|
||||
dbgs() << "FnSpecialization: Gain = " << Entry.second.Gain
|
||||
<< "\n";
|
||||
for (const ArgInfo &Arg : Entry.second.Args)
|
||||
dbgs() << "FnSpecialization: FormalArg = "
|
||||
<< Arg.Formal->getNameOrAsOperand()
|
||||
<< ", ActualArg = "
|
||||
<< Arg.Actual->getNameOrAsOperand() << "\n";
|
||||
});
|
||||
|
||||
return !WorkList.empty();
|
||||
}
|
||||
|
||||
bool isCandidateFunction(Function *F) {
|
||||
|
@ -490,12 +500,12 @@ private:
|
|||
Function *Clone = cloneCandidateFunction(F, Mappings);
|
||||
|
||||
// Rewrite calls to the function so that they call the clone instead.
|
||||
rewriteCallSites(Clone, S.Arg, Mappings);
|
||||
rewriteCallSites(Clone, S.Args, Mappings);
|
||||
|
||||
// Initialize the lattice state of the arguments of the function clone,
|
||||
// marking the argument on which we specialized the function constant
|
||||
// with the given value.
|
||||
Solver.markArgInFuncSpecialization(Clone, S.Arg);
|
||||
Solver.markArgInFuncSpecialization(Clone, S.Args);
|
||||
|
||||
// Mark all the specialized functions
|
||||
WorkList.push_back(Clone);
|
||||
|
@ -641,7 +651,8 @@ private:
|
|||
///
|
||||
/// \returns true if the function should be specialized on the given
|
||||
/// argument.
|
||||
bool isArgumentInteresting(Argument *A, ConstList &Constants) {
|
||||
bool isArgumentInteresting(Argument *A,
|
||||
SmallVectorImpl<CallArgBinding> &Constants) {
|
||||
// For now, don't attempt to specialize functions based on the values of
|
||||
// composite types.
|
||||
if (!A->getType()->isSingleValueType() || A->user_empty())
|
||||
|
@ -681,7 +692,8 @@ private:
|
|||
|
||||
/// Collect in \p Constants all the constant values that argument \p A can
|
||||
/// take on.
|
||||
void getPossibleConstants(Argument *A, ConstList &Constants) {
|
||||
void getPossibleConstants(Argument *A,
|
||||
SmallVectorImpl<CallArgBinding> &Constants) {
|
||||
Function *F = A->getParent();
|
||||
|
||||
// Iterate over all the call sites of the argument's parent function.
|
||||
|
@ -723,23 +735,24 @@ private:
|
|||
|
||||
if (isa<Constant>(V) && (Solver.getLatticeValueFor(V).isConstant() ||
|
||||
EnableSpecializationForLiteralConstant))
|
||||
Constants.push_back(cast<Constant>(V));
|
||||
Constants.push_back({&CS, cast<Constant>(V)});
|
||||
}
|
||||
}
|
||||
|
||||
/// Rewrite calls to function \p F to call function \p Clone instead.
|
||||
///
|
||||
/// This function modifies calls to function \p F as long as the actual
|
||||
/// argument matches the one in \p Arg. Note that for recursive calls we
|
||||
/// need to compare against the cloned formal argument.
|
||||
/// arguments match those in \p Args. Note that for recursive calls we
|
||||
/// need to compare against the cloned formal arguments.
|
||||
///
|
||||
/// Callsites that have been marked with the MinSize function attribute won't
|
||||
/// be specialized and rewritten.
|
||||
void rewriteCallSites(Function *Clone, const ArgInfo &Arg,
|
||||
void rewriteCallSites(Function *Clone, const SmallVectorImpl<ArgInfo> &Args,
|
||||
ValueToValueMapTy &Mappings) {
|
||||
Function *F = Arg.Formal->getParent();
|
||||
unsigned ArgNo = Arg.Formal->getArgNo();
|
||||
SmallVector<CallBase *, 4> CallSitesToRewrite;
|
||||
assert(!Args.empty() && "Specialization without arguments");
|
||||
Function *F = Args[0].Formal->getParent();
|
||||
|
||||
SmallVector<CallBase *, 8> CallSitesToRewrite;
|
||||
for (auto *U : F->users()) {
|
||||
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
|
||||
continue;
|
||||
|
@ -758,9 +771,16 @@ private:
|
|||
<< "\n");
|
||||
if (/* recursive call */
|
||||
(CS->getFunction() == Clone &&
|
||||
CS->getArgOperand(ArgNo) == Mappings[Arg.Formal]) ||
|
||||
all_of(Args,
|
||||
[CS, &Mappings](const ArgInfo &Arg) {
|
||||
unsigned ArgNo = Arg.Formal->getArgNo();
|
||||
return CS->getArgOperand(ArgNo) == Mappings[Arg.Formal];
|
||||
})) ||
|
||||
/* normal call */
|
||||
CS->getArgOperand(ArgNo) == Arg.Actual) {
|
||||
all_of(Args, [CS](const ArgInfo &Arg) {
|
||||
unsigned ArgNo = Arg.Formal->getArgNo();
|
||||
return CS->getArgOperand(ArgNo) == Arg.Actual;
|
||||
})) {
|
||||
CS->setCalledFunction(Clone);
|
||||
Solver.markOverdefined(CS);
|
||||
}
|
||||
|
@ -891,7 +911,7 @@ bool llvm::runFunctionSpecialization(
|
|||
// Initially resolve the constants in all the argument tracked functions.
|
||||
RunSCCPSolver(FuncDecls);
|
||||
|
||||
SmallVector<Function *, 2> WorkList;
|
||||
SmallVector<Function *, 8> WorkList;
|
||||
unsigned I = 0;
|
||||
while (FuncSpecializationMaxIters != I++ &&
|
||||
FS.specializeFunctions(FuncDecls, WorkList)) {
|
||||
|
|
|
@ -450,7 +450,8 @@ public:
|
|||
return TrackingIncomingArguments;
|
||||
}
|
||||
|
||||
void markArgInFuncSpecialization(Function *F, const ArgInfo &Arg);
|
||||
void markArgInFuncSpecialization(Function *F,
|
||||
const SmallVectorImpl<ArgInfo> &Args);
|
||||
|
||||
void markFunctionUnreachable(Function *F) {
|
||||
for (auto &BB : *F)
|
||||
|
@ -524,21 +525,24 @@ Constant *SCCPInstVisitor::getConstant(const ValueLatticeElement &LV) const {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
void SCCPInstVisitor::markArgInFuncSpecialization(Function *F,
|
||||
const ArgInfo &Arg) {
|
||||
assert(F->arg_size() == Arg.Formal->getParent()->arg_size() &&
|
||||
void SCCPInstVisitor::markArgInFuncSpecialization(
|
||||
Function *F, const SmallVectorImpl<ArgInfo> &Args) {
|
||||
assert(!Args.empty() && "Specialization without arguments");
|
||||
assert(F->arg_size() == Args[0].Formal->getParent()->arg_size() &&
|
||||
"Functions should have the same number of arguments");
|
||||
|
||||
auto Iter = Args.begin();
|
||||
Argument *NewArg = F->arg_begin();
|
||||
Argument *OldArg = Arg.Formal->getParent()->arg_begin();
|
||||
Argument *OldArg = Args[0].Formal->getParent()->arg_begin();
|
||||
for (auto End = F->arg_end(); NewArg != End; ++NewArg, ++OldArg) {
|
||||
|
||||
LLVM_DEBUG(dbgs() << "SCCP: Marking argument "
|
||||
<< NewArg->getNameOrAsOperand() << "\n");
|
||||
|
||||
if (OldArg == Arg.Formal) {
|
||||
if (OldArg == Iter->Formal) {
|
||||
// Mark the argument constants in the new function.
|
||||
markConstant(NewArg, Arg.Actual);
|
||||
markConstant(NewArg, Iter->Actual);
|
||||
++Iter;
|
||||
} else if (ValueState.count(OldArg)) {
|
||||
// For the remaining arguments in the new function, copy the lattice state
|
||||
// over from the old function.
|
||||
|
@ -1717,8 +1721,9 @@ SmallPtrSetImpl<Function *> &SCCPSolver::getArgumentTrackedFunctions() {
|
|||
return Visitor->getArgumentTrackedFunctions();
|
||||
}
|
||||
|
||||
void SCCPSolver::markArgInFuncSpecialization(Function *F, const ArgInfo &Arg) {
|
||||
Visitor->markArgInFuncSpecialization(F, Arg);
|
||||
void SCCPSolver::markArgInFuncSpecialization(
|
||||
Function *F, const SmallVectorImpl<ArgInfo> &Args) {
|
||||
Visitor->markArgInFuncSpecialization(F, Args);
|
||||
}
|
||||
|
||||
void SCCPSolver::markFunctionUnreachable(Function *F) {
|
||||
|
|
|
@ -46,7 +46,7 @@ entry:
|
|||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: %0 = load i32, i32* @A, align 4
|
||||
; CHECK-NEXT: %add = add nsw i32 %x, %0
|
||||
; CHECK-NEXT: %1 = load i32, i32* %c, align 4
|
||||
; CHECK-NEXT: %1 = load i32, i32* @C, align 4
|
||||
; CHECK-NEXT: %add1 = add nsw i32 %add, %1
|
||||
; CHECK-NEXT: ret i32 %add1
|
||||
; CHECK-NEXT: }
|
||||
|
@ -55,7 +55,7 @@ entry:
|
|||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: %0 = load i32, i32* @B, align 4
|
||||
; CHECK-NEXT: %add = add nsw i32 %x, %0
|
||||
; CHECK-NEXT: %1 = load i32, i32* %c, align 4
|
||||
; CHECK-NEXT: %1 = load i32, i32* @D, align 4
|
||||
; CHECK-NEXT: %add1 = add nsw i32 %add, %1
|
||||
; CHECK-NEXT: ret i32 %add1
|
||||
; CHECK-NEXT: }
|
||||
|
|
|
@ -0,0 +1,185 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -function-specialization -func-specialization-max-clones=0 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=NONE
|
||||
; RUN: opt -function-specialization -func-specialization-max-clones=1 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=ONE
|
||||
; RUN: opt -function-specialization -func-specialization-max-clones=2 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=TWO
|
||||
; RUN: opt -function-specialization -func-specialization-max-clones=3 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=THREE
|
||||
|
||||
; Make sure that we iterate correctly after sorting the specializations:
|
||||
; FnSpecialization: Specializations for function compute
|
||||
; FnSpecialization: Gain = 608
|
||||
; FnSpecialization: FormalArg = binop1, ActualArg = power
|
||||
; FnSpecialization: FormalArg = binop2, ActualArg = mul
|
||||
; FnSpecialization: Gain = 982
|
||||
; FnSpecialization: FormalArg = binop1, ActualArg = plus
|
||||
; FnSpecialization: FormalArg = binop2, ActualArg = minus
|
||||
; FnSpecialization: Gain = 795
|
||||
; FnSpecialization: FormalArg = binop1, ActualArg = minus
|
||||
; FnSpecialization: FormalArg = binop2, ActualArg = power
|
||||
|
||||
define i64 @main(i64 %x, i64 %y, i1 %flag) {
|
||||
; NONE-LABEL: @main(
|
||||
; NONE-NEXT: entry:
|
||||
; NONE-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
|
||||
; NONE: plus:
|
||||
; NONE-NEXT: [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul)
|
||||
; NONE-NEXT: br label [[MERGE:%.*]]
|
||||
; NONE: minus:
|
||||
; NONE-NEXT: [[TMP1:%.*]] = call i64 @compute(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @plus, i64 (i64, i64)* @minus)
|
||||
; NONE-NEXT: br label [[MERGE]]
|
||||
; NONE: merge:
|
||||
; NONE-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
|
||||
; NONE-NEXT: [[TMP3:%.*]] = call i64 @compute(i64 [[TMP2]], i64 42, i64 (i64, i64)* @minus, i64 (i64, i64)* @power)
|
||||
; NONE-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
; ONE-LABEL: @main(
|
||||
; ONE-NEXT: entry:
|
||||
; ONE-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
|
||||
; ONE: plus:
|
||||
; ONE-NEXT: [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul)
|
||||
; ONE-NEXT: br label [[MERGE:%.*]]
|
||||
; ONE: minus:
|
||||
; ONE-NEXT: [[TMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @plus, i64 (i64, i64)* @minus)
|
||||
; ONE-NEXT: br label [[MERGE]]
|
||||
; ONE: merge:
|
||||
; ONE-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
|
||||
; ONE-NEXT: [[TMP3:%.*]] = call i64 @compute(i64 [[TMP2]], i64 42, i64 (i64, i64)* @minus, i64 (i64, i64)* @power)
|
||||
; ONE-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
; TWO-LABEL: @main(
|
||||
; TWO-NEXT: entry:
|
||||
; TWO-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
|
||||
; TWO: plus:
|
||||
; TWO-NEXT: [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul)
|
||||
; TWO-NEXT: br label [[MERGE:%.*]]
|
||||
; TWO: minus:
|
||||
; TWO-NEXT: [[TMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @plus, i64 (i64, i64)* @minus)
|
||||
; TWO-NEXT: br label [[MERGE]]
|
||||
; TWO: merge:
|
||||
; TWO-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
|
||||
; TWO-NEXT: [[TMP3:%.*]] = call i64 @compute.2(i64 [[TMP2]], i64 42, i64 (i64, i64)* @minus, i64 (i64, i64)* @power)
|
||||
; TWO-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
; THREE-LABEL: @main(
|
||||
; THREE-NEXT: entry:
|
||||
; THREE-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
|
||||
; THREE: plus:
|
||||
; THREE-NEXT: [[TMP0:%.*]] = call i64 @compute.3(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul)
|
||||
; THREE-NEXT: br label [[MERGE:%.*]]
|
||||
; THREE: minus:
|
||||
; THREE-NEXT: [[TMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @plus, i64 (i64, i64)* @minus)
|
||||
; THREE-NEXT: br label [[MERGE]]
|
||||
; THREE: merge:
|
||||
; THREE-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
|
||||
; THREE-NEXT: [[TMP3:%.*]] = call i64 @compute.2(i64 [[TMP2]], i64 42, i64 (i64, i64)* @minus, i64 (i64, i64)* @power)
|
||||
; THREE-NEXT: ret i64 [[TMP3]]
|
||||
;
|
||||
entry:
|
||||
br i1 %flag, label %plus, label %minus
|
||||
|
||||
plus:
|
||||
%tmp0 = call i64 @compute(i64 %x, i64 %y, i64 (i64, i64)* @power, i64 (i64, i64)* @mul)
|
||||
br label %merge
|
||||
|
||||
minus:
|
||||
%tmp1 = call i64 @compute(i64 %x, i64 %y, i64 (i64, i64)* @plus, i64 (i64, i64)* @minus)
|
||||
br label %merge
|
||||
|
||||
merge:
|
||||
%tmp2 = phi i64 [ %tmp0, %plus ], [ %tmp1, %minus]
|
||||
%tmp3 = call i64 @compute(i64 %tmp2, i64 42, i64 (i64, i64)* @minus, i64 (i64, i64)* @power)
|
||||
ret i64 %tmp3
|
||||
}
|
||||
|
||||
; THREE-NOT: define internal i64 @compute
|
||||
;
|
||||
; THREE-LABEL: define internal i64 @compute.1(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) {
|
||||
; THREE-NEXT: entry:
|
||||
; THREE-NEXT: [[TMP0:%.+]] = call i64 @plus(i64 %x, i64 %y)
|
||||
; THREE-NEXT: [[TMP1:%.+]] = call i64 @minus(i64 %x, i64 %y)
|
||||
; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
|
||||
; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
|
||||
; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y
|
||||
; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2
|
||||
; THREE-NEXT: ret i64 [[TMP5]]
|
||||
; THREE-NEXT: }
|
||||
;
|
||||
; THREE-LABEL: define internal i64 @compute.2(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) {
|
||||
; THREE-NEXT: entry:
|
||||
; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 %y)
|
||||
; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 %y)
|
||||
; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
|
||||
; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
|
||||
; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y
|
||||
; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2
|
||||
; THREE-NEXT: ret i64 [[TMP5]]
|
||||
; THREE-NEXT: }
|
||||
;
|
||||
; THREE-LABEL: define internal i64 @compute.3(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) {
|
||||
; THREE-NEXT: entry:
|
||||
; THREE-NEXT: [[TMP0:%.+]] = call i64 @power(i64 %x, i64 %y)
|
||||
; THREE-NEXT: [[TMP1:%.+]] = call i64 @mul(i64 %x, i64 %y)
|
||||
; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
|
||||
; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
|
||||
; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y
|
||||
; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2
|
||||
; THREE-NEXT: ret i64 [[TMP5]]
|
||||
; THREE-NEXT: }
|
||||
;
|
||||
define internal i64 @compute(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) {
|
||||
entry:
|
||||
%tmp0 = call i64 %binop1(i64 %x, i64 %y)
|
||||
%tmp1 = call i64 %binop2(i64 %x, i64 %y)
|
||||
%add = add i64 %tmp0, %tmp1
|
||||
%div = sdiv i64 %add, %x
|
||||
%sub = sub i64 %div, %y
|
||||
%mul = mul i64 %sub, 2
|
||||
ret i64 %mul
|
||||
}
|
||||
|
||||
define internal i64 @plus(i64 %x, i64 %y) {
|
||||
entry:
|
||||
%tmp0 = add i64 %x, %y
|
||||
ret i64 %tmp0
|
||||
}
|
||||
|
||||
define internal i64 @minus(i64 %x, i64 %y) {
|
||||
entry:
|
||||
%tmp0 = sub i64 %x, %y
|
||||
ret i64 %tmp0
|
||||
}
|
||||
|
||||
define internal i64 @mul(i64 %x, i64 %n) {
|
||||
entry:
|
||||
%cmp6 = icmp sgt i64 %n, 1
|
||||
br i1 %cmp6, label %for.body, label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body, %entry
|
||||
%x.addr.0.lcssa = phi i64 [ %x, %entry ], [ %add, %for.body ]
|
||||
ret i64 %x.addr.0.lcssa
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
|
||||
%x.addr.07 = phi i64 [ %add, %for.body ], [ %x, %entry ]
|
||||
%add = shl nsw i64 %x.addr.07, 1
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n
|
||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
||||
}
|
||||
|
||||
define internal i64 @power(i64 %x, i64 %n) {
|
||||
entry:
|
||||
%cmp6 = icmp sgt i64 %n, 1
|
||||
br i1 %cmp6, label %for.body, label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup: ; preds = %for.body, %entry
|
||||
%x.addr.0.lcssa = phi i64 [ %x, %entry ], [ %mul, %for.body ]
|
||||
ret i64 %x.addr.0.lcssa
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
|
||||
%x.addr.07 = phi i64 [ %mul, %for.body ], [ %x, %entry ]
|
||||
%mul = mul nsw i64 %x.addr.07, %x.addr.07
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n
|
||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
||||
}
|
Loading…
Reference in New Issue