When inlining a function and adding its inner call sites to the

candidate set for subsequent inlining, try to simplify the arguments to
the inner call site now that inlining has been performed.

The goal here is to propagate and fold constants through deeply nested
call chains. Without doing this, we loose the inliner bonus that should
be applied because the arguments don't match the exact pattern the cost
estimator uses.

Reviewed on IRC by Benjamin Kramer.

llvm-svn: 152556
This commit is contained in:
Chandler Carruth 2012-03-12 11:19:33 +00:00
parent a0796555e2
commit 595fda8466
2 changed files with 110 additions and 1 deletions

View File

@ -19,6 +19,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/Transforms/Utils/Cloning.h"
@ -327,6 +328,37 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
return false;
}
/// \brief Simplify arguments going into a particular callsite.
///
/// This is important to do each time we add a callsite due to inlining so that
/// constants and other entities which feed into inline cost estimation are
/// properly recognized when analyzing the new callsite. Consider:
/// void outer(int x) {
/// if (x < 42)
/// return inner(42 - x);
/// ...
/// }
/// void inner(int x) {
/// ...
/// }
///
/// The inliner gives calls to 'outer' with a constant argument a bonus because
/// it will delete one side of a branch. But the resulting call to 'inner'
/// will, after inlining, also have a constant operand. We need to do just
/// enough constant folding to expose this for callsite arguments. The rest
/// will be taken care of after the inliner finishes running.
static void simplifyCallSiteArguments(const TargetData *TD, CallSite CS) {
// FIXME: It would be nice to avoid this smallvector if RAUW doesn't
// invalidate operand iterators in any cases.
SmallVector<std::pair<Value *, Value*>, 4> SimplifiedArgs;
for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
I != E; ++I)
if (Instruction *Inst = dyn_cast<Instruction>(*I))
if (Value *SimpleArg = SimplifyInstruction(Inst, TD))
SimplifiedArgs.push_back(std::make_pair(Inst, SimpleArg));
for (unsigned Idx = 0, Size = SimplifiedArgs.size(); Idx != Size; ++Idx)
SimplifiedArgs[Idx].first->replaceAllUsesWith(SimplifiedArgs[Idx].second);
}
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraph>();
@ -455,7 +487,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
i != e; ++i) {
Value *Ptr = InlineInfo.InlinedCalls[i];
CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
CallSite NewCS = Ptr;
simplifyCallSiteArguments(TD, NewCS);
CallSites.push_back(std::make_pair(NewCS, NewHistoryID));
}
}

View File

@ -12,3 +12,78 @@ define i32 @caller1() {
%X = call i32 @callee1( i32 10, i32 3 )
ret i32 %X
}
define i32 @caller2() {
; CHECK: @caller2
; CHECK-NOT: call void @callee2
; CHECK: ret
; We contrive to make this hard for *just* the inline pass to do in order to
; simulate what can actually happen with large, complex functions getting
; inlined.
%a = add i32 42, 0
%b = add i32 48, 0
%x = call i32 @callee21(i32 %a, i32 %b)
ret i32 %x
}
define i32 @callee21(i32 %x, i32 %y) {
%sub = sub i32 %y, %x
%result = call i32 @callee22(i32 %sub)
ret i32 %result
}
declare i8* @getptr()
define i32 @callee22(i32 %x) {
%icmp = icmp ugt i32 %x, 42
br i1 %icmp, label %bb.true, label %bb.false
bb.true:
; This block musn't be counted in the inline cost.
%ptr = call i8* @getptr()
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
load volatile i8* %ptr
ret i32 %x
bb.false:
ret i32 %x
}