forked from OSchip/llvm-project
Teach instcombine to fold compares of loads from constant
arrays with variable indices into a comparison of the index with a constant. The most common occurrence of this that I see by far is stuff like: if ("foobar"[i] == '\0') ... which we compile into: if (i == 6), saving a load and materialization of the global address. This also exposes loop trip count information to later passes in many cases. This triggers hundreds of times in xalancbmk, which is where I first noticed it, but it also triggers in many other apps. Here are a few interesting ones from various apps: @must_be_connected_without = internal constant [8 x i8*] [i8* getelementptr inbounds ([3 x i8]* @.str64320, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str27283, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str71327, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str72328, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str18274, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8]* @.str11267, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8]* @.str32288, i64 0, i64 0), i8* null], align 32 ; <[8 x i8*]*> [#uses=2] %scevgep.i = getelementptr [8 x i8*]* @must_be_connected_without, i64 0, i64 %indvar.i ; <i8**> [#uses=1] %17 = load ... %18 = icmp eq i8* %17, null ; <i1> [#uses=1] -> icmp eq i64 %indvar.i, 7 @yytable1095 = internal constant [84 x i8] c"\12\01(\05\06\07\08\09\0A\0B\0C\0D\0E1\0F\10\11266\1D: \10\11,-,0\03'\10\11B6\04\17&\18\1945\05\06\07\08\09\0A\0B\0C\0D\0E\1E\0F\10\11*\1A\1B\1C$3+>#%;<IJ=ADFEGH9KL\00\00\00C", align 32 ; <[84 x i8]*> [#uses=2] %57 = getelementptr inbounds [84 x i8]* @yytable1095, i64 0, i64 %56 ; <i8*> [#uses=1] %mode.0.in = getelementptr inbounds [9 x i32]* @mb_mode_table, i64 0, i64 %.pn ; <i32*> [#uses=1] load ... %64 = icmp eq i8 %58, 4 ; <i1> [#uses=1] -> icmp eq i64 %.pn, 35 ; <i1> [#uses=0] @gsm_DLB = internal constant [4 x i16] [i16 6554, i16 16384, i16 26214, i16 32767] %scevgep.i = getelementptr [4 x i16]* @gsm_DLB, i64 0, i64 %indvar.i ; <i16*> [#uses=1] %425 = load %scevgep.i %426 = icmp eq i16 %425, -32768 ; <i1> [#uses=0] -> false llvm-svn: 92411
This commit is contained in:
parent
8fb74c6ee2
commit
a061859ccc
|
@ -258,6 +258,8 @@ namespace {
|
|||
Instruction *commonShiftTransforms(BinaryOperator &I);
|
||||
Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI,
|
||||
Constant *RHSC);
|
||||
Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
|
||||
GlobalVariable *GV, CmpInst &ICI);
|
||||
Instruction *visitFCmpInst(FCmpInst &I);
|
||||
Instruction *visitICmpInst(ICmpInst &I);
|
||||
Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
|
||||
|
@ -6015,6 +6017,112 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
|
|||
return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
|
||||
}
|
||||
|
||||
|
||||
/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern:
|
||||
/// cmp pred (load (gep GV, ...)), cmpcst
|
||||
/// where GV is a global variable with a constant initializer. Try to simplify
|
||||
/// this into one or two simpler comparisons that do not need the load. For
|
||||
/// example, we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into
|
||||
/// "icmp eq i, 3". We assume that eliminating a load is always goodness.
|
||||
Instruction *InstCombiner::
|
||||
FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
|
||||
CmpInst &ICI) {
|
||||
|
||||
// There are many forms of this optimization we can handle, for now, just do
|
||||
// the simple index into a single-dimensional array.
|
||||
//
|
||||
// Require: GEP GV, 0, i
|
||||
if (GEP->getNumOperands() != 3 ||
|
||||
!isa<ConstantInt>(GEP->getOperand(1)) ||
|
||||
!cast<ConstantInt>(GEP->getOperand(1))->isZero())
|
||||
return 0;
|
||||
|
||||
ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer());
|
||||
if (Init == 0 || Init->getNumOperands() > 1024) return 0;
|
||||
|
||||
|
||||
// Variables for our state machines.
|
||||
|
||||
// OnlyTrueElement - Used to emit a comparison of "i == 47", where 47 is the
|
||||
// only index the condition is true for. The values are -1 -> undef,
|
||||
// -2 -> overdef, >= 0 -> that index is true.
|
||||
int OnlyTrueElement = -1;
|
||||
|
||||
// OnlyFalseElement - Used to emit a comparison of "i != 47", where 47 is the
|
||||
// only index the condition is false for. The values are -1 -> undef,
|
||||
// -2 -> overdef, >= 0 -> that index is false.
|
||||
int OnlyFalseElement = -1;
|
||||
|
||||
// Scan the array and see if one of our patterns matches.
|
||||
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
|
||||
for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
|
||||
// Find out if the comparison would be true or false for the i'th element.
|
||||
Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(),
|
||||
Init->getOperand(i),
|
||||
CompareRHS, TD);
|
||||
// If the result is undef for this element, ignore it.
|
||||
if (isa<UndefValue>(C)) continue;
|
||||
|
||||
// If we can't compute the result for any of the elements, we have to give
|
||||
// up evaluating the entire conditional.
|
||||
if (!isa<ConstantInt>(C)) return 0;
|
||||
|
||||
// Otherwise, we know if the comparison is true or false for this element,
|
||||
// update our state machines.
|
||||
bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
|
||||
|
||||
// State machine for single index comparison.
|
||||
if (IsTrueForElt) {
|
||||
// If undefined -> defined. Otherwise -> overdefined.
|
||||
OnlyTrueElement = OnlyTrueElement == -1 ? i : -2;
|
||||
} else {
|
||||
// If undefined -> defined. Otherwise -> overdefined.
|
||||
OnlyFalseElement = OnlyFalseElement == -1 ? i : -2;
|
||||
}
|
||||
|
||||
// If all of our states become overdefined, bail out early.
|
||||
if (OnlyTrueElement == -2 && OnlyFalseElement == -2)
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Now that we've scanned the entire array, emit our new comparison(s). We
|
||||
// order the state machines in complexity of the generated code.
|
||||
if (OnlyTrueElement != -2) {
|
||||
// None true -> false.
|
||||
if (OnlyTrueElement == -1)
|
||||
return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));
|
||||
|
||||
// True for one element -> 'i == 47'.
|
||||
return new ICmpInst(ICmpInst::ICMP_EQ, GEP->getOperand(2),
|
||||
ConstantInt::get(GEP->getOperand(2)->getType(),
|
||||
OnlyTrueElement));
|
||||
}
|
||||
|
||||
if (OnlyFalseElement != -2) {
|
||||
// None false -> true.
|
||||
if (OnlyFalseElement == -1)
|
||||
return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));
|
||||
|
||||
return new ICmpInst(ICmpInst::ICMP_NE, GEP->getOperand(2),
|
||||
ConstantInt::get(GEP->getOperand(2)->getType(),
|
||||
OnlyFalseElement));
|
||||
}
|
||||
|
||||
assert(0 && "Should have bailed out early");
|
||||
|
||||
// TODO: FCMP.
|
||||
|
||||
// TODO: Range check.
|
||||
|
||||
// TODO: If the global array has 32 (or 64 if native!) or less entries, we
|
||||
// can turn this into something like:
|
||||
// ((magicbitconstant >> i) & 1) != 0)
|
||||
// where we populate magicbitconstant with 0101010 based on the comparison
|
||||
// results.
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
|
||||
bool Changed = false;
|
||||
|
||||
|
@ -6175,7 +6283,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
|
|||
Value *A = 0, *B = 0;
|
||||
|
||||
// (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
|
||||
if (I.isEquality() && CI->isNullValue() &&
|
||||
if (I.isEquality() && CI->isZero() &&
|
||||
match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
|
||||
// (icmp cond A B) if cond is equality
|
||||
return new ICmpInst(I.getPredicate(), A, B);
|
||||
|
@ -6475,6 +6583,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
|
|||
return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
|
||||
Constant::getNullValue(LHSI->getOperand(0)->getType()));
|
||||
break;
|
||||
|
||||
case Instruction::Load:
|
||||
if (GetElementPtrInst *GEP =
|
||||
dyn_cast<GetElementPtrInst>(LHSI->getOperand(0)))
|
||||
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
|
||||
if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
|
||||
!cast<LoadInst>(LHSI)->isVolatile()) {
|
||||
if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
|
||||
return Res;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
||||
@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
|
||||
i16 73, i16 82, i16 69, i16 68, i16 0]
|
||||
|
||||
define i1 @test1(i32 %X) {
|
||||
%P = getelementptr [10 x i16]* @G16, i32 0, i32 %X
|
||||
%Q = load i16* %P
|
||||
%R = icmp eq i16 %Q, 0
|
||||
ret i1 %R
|
||||
; CHECK: @test1
|
||||
; CHECK-NEXT: %R = icmp eq i32 %X, 9
|
||||
; CHECK-NEXT: ret i1 %R
|
||||
}
|
||||
|
||||
define i1 @test2(i32 %X) {
|
||||
%P = getelementptr [10 x i16]* @G16, i32 0, i32 %X
|
||||
%Q = load i16* %P
|
||||
%R = icmp slt i16 %Q, 85
|
||||
ret i1 %R
|
||||
; CHECK: @test2
|
||||
; CHECK-NEXT: %R = icmp ne i32 %X, 4
|
||||
; CHECK-NEXT: ret i1 %R
|
||||
}
|
||||
|
Loading…
Reference in New Issue