forked from OSchip/llvm-project
Teach the table lookup optimization to generate range compares
when a consequtive sequence of elements all satisfies the predicate. Like the double compare case, this generates better code than the magic constant case and generalizes to more than 32/64 element array lookups. Here are some examples where it triggers. From 403.gcc, most accesses to the rtx_class array are handled, e.g.: @rtx_class = constant [153 x i8] c"xxxxxmmmmmmmmxxxxxxxxxxxxmxxxxxxiiixxxxxxxxxxxxxxxxxxxooxooooooxxoooooox3x2c21c2222ccc122222ccccaaaaaa<<<<<<<<<<<<<<<<<<111111111111bbooxxxxxxxxxxcc2211x", align 32 ; <[153 x i8]*> [#uses=547] %142 = icmp eq i8 %141, 105 @rtx_class = constant [153 x i8] c"xxxxxmmmmmmmmxxxxxxxxxxxxmxxxxxxiiixxxxxxxxxxxxxxxxxxxooxooooooxxoooooox3x2c21c2222ccc122222ccccaaaaaa<<<<<<<<<<<<<<<<<<111111111111bbooxxxxxxxxxxcc2211x", align 32 ; <[153 x i8]*> [#uses=543] %165 = icmp eq i8 %164, 60 Also, most of the 59-element arrays (mode_class/rid_to_yy, etc) optimized before are actually range compares. This lets 32-bit machines optimize them. 400.perlbmk has stuff like this: 400.perlbmk: PL_regkind, even for 32-bit: @PL_regkind = constant [62 x i8] c"\00\00\02\02\02\06\06\06\06\09\09\0B\0B\0D\0E\0E\0E\11\12\12\14\14\16\16\18\18\1A\1A\1C\1C\1E\1F !!!$$&'((((,-.///88886789:;8$", align 32 ; <[62 x i8]*> [#uses=4] %811 = icmp ne i8 %810, 33 @PL_utf8skip = constant [256 x i8] c"\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\01\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\02\03\03\03\03\03\03\03\03\03\03\03\03\03\03\03\03\04\04\04\04\04\04\04\04\05\05\05\05\06\06\07\0D", align 32 ; <[256 x i8]*> [#uses=94] %12 = icmp ult i8 %10, 2 etc. llvm-svn: 92426
This commit is contained in:
parent
e199d2df80
commit
b56bef45f8
|
@ -6053,6 +6053,14 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
|
|||
// form "i != 47 & i != 87". Same state transitions as for true elements.
|
||||
int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
|
||||
|
||||
/// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
|
||||
/// define a state machine that triggers for ranges of values that the index
|
||||
/// is true or false for. This triggers on things like "abbbbc"[i] == 'b'.
|
||||
/// This is -2 when undefined, -3 when overdefined, and otherwise the last
|
||||
/// index in the range (inclusive). We use -2 for undefined here because we
|
||||
/// use relative comparisons and don't want 0-1 to match -1.
|
||||
int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
|
||||
|
||||
// MagicBitvector - This is a magic bitvector where we set a bit if the
|
||||
// comparison is true for element 'i'. If there are 64 elements or less in
|
||||
// the array, this will fully represent all the comparison results.
|
||||
|
@ -6067,7 +6075,15 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
|
|||
Init->getOperand(i),
|
||||
CompareRHS, TD);
|
||||
// If the result is undef for this element, ignore it.
|
||||
if (isa<UndefValue>(C)) continue;
|
||||
if (isa<UndefValue>(C)) {
|
||||
// Extend range state machines to cover this element in case there is an
|
||||
// undef in the middle of the range.
|
||||
if (TrueRangeEnd == (int)i-1)
|
||||
TrueRangeEnd = i;
|
||||
if (FalseRangeEnd == (int)i-1)
|
||||
FalseRangeEnd = i;
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we can't compute the result for any of the elements, we have to give
|
||||
// up evaluating the entire conditional.
|
||||
|
@ -6077,32 +6093,54 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
|
|||
// update our state machines.
|
||||
bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
|
||||
|
||||
// State machine for single index comparison.
|
||||
// State machine for single/double/range index comparison.
|
||||
if (IsTrueForElt) {
|
||||
// Update the TrueElement state machine.
|
||||
if (FirstTrueElement == Undefined)
|
||||
FirstTrueElement = i;
|
||||
else if (SecondTrueElement == Undefined)
|
||||
FirstTrueElement = TrueRangeEnd = i; // First true element.
|
||||
else {
|
||||
// Update double-compare state machine.
|
||||
if (SecondTrueElement == Undefined)
|
||||
SecondTrueElement = i;
|
||||
else
|
||||
SecondTrueElement = Overdefined;
|
||||
|
||||
// Update range state machine.
|
||||
if (TrueRangeEnd == (int)i-1)
|
||||
TrueRangeEnd = i;
|
||||
else
|
||||
TrueRangeEnd = Overdefined;
|
||||
}
|
||||
} else {
|
||||
// Update the FalseElement state machine.
|
||||
if (FirstFalseElement == Undefined)
|
||||
FirstFalseElement = i;
|
||||
else if (SecondFalseElement == Undefined)
|
||||
FirstFalseElement = FalseRangeEnd = i; // First false element.
|
||||
else {
|
||||
// Update double-compare state machine.
|
||||
if (SecondFalseElement == Undefined)
|
||||
SecondFalseElement = i;
|
||||
else
|
||||
SecondFalseElement = Overdefined;
|
||||
|
||||
// Update range state machine.
|
||||
if (FalseRangeEnd == (int)i-1)
|
||||
FalseRangeEnd = i;
|
||||
else
|
||||
FalseRangeEnd = Overdefined;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// If this element is in range, update our magic bitvector.
|
||||
if (i < 64 && IsTrueForElt)
|
||||
MagicBitvector |= 1ULL << i;
|
||||
|
||||
// If all of our states become overdefined, bail out early.
|
||||
if (i >= 64 && SecondTrueElement == Overdefined &&
|
||||
SecondFalseElement == Overdefined)
|
||||
// If all of our states become overdefined, bail out early. Since the
|
||||
// predicate is expensive, only check it every 8 elements. This is only
|
||||
// really useful for really huge arrays.
|
||||
if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
|
||||
SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
|
||||
FalseRangeEnd == Overdefined)
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -6110,6 +6148,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
|
|||
// order the state machines in complexity of the generated code.
|
||||
Value *Idx = GEP->getOperand(2);
|
||||
|
||||
|
||||
// If the comparison is only true for one or two elements, emit direct
|
||||
// comparisons.
|
||||
if (SecondTrueElement != Overdefined) {
|
||||
|
@ -6150,6 +6189,37 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
|
|||
return BinaryOperator::CreateAnd(C1, C2);
|
||||
}
|
||||
|
||||
// If the comparison can be replaced with a range comparison for the elements
|
||||
// where it is true, emit the range check.
|
||||
if (TrueRangeEnd != Overdefined) {
|
||||
assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
|
||||
|
||||
// Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
|
||||
if (FirstTrueElement) {
|
||||
Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
|
||||
Idx = Builder->CreateAdd(Idx, Offs);
|
||||
}
|
||||
|
||||
Value *End = ConstantInt::get(Idx->getType(),
|
||||
TrueRangeEnd-FirstTrueElement+1);
|
||||
return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
|
||||
}
|
||||
|
||||
// False range check.
|
||||
if (FalseRangeEnd != Overdefined) {
|
||||
assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
|
||||
// Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
|
||||
if (FirstFalseElement) {
|
||||
Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
|
||||
Idx = Builder->CreateAdd(Idx, Offs);
|
||||
}
|
||||
|
||||
Value *End = ConstantInt::get(Idx->getType(),
|
||||
FalseRangeEnd-FirstFalseElement);
|
||||
return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
|
||||
}
|
||||
|
||||
|
||||
// If a 32-bit or 64-bit magic bitvector captures the entire comparison state
|
||||
// of this load, replace it with computation that does:
|
||||
// ((magic_cst >> i) & 1) != 0
|
||||
|
@ -6166,14 +6236,8 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
|
|||
return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
|
||||
}
|
||||
|
||||
// TODO: Range check
|
||||
// TODO: GEP 0, i, 4
|
||||
// TODO: A[i]&4 == 0
|
||||
|
||||
//errs() << "XFORM: " << *GV << "\n";
|
||||
//errs() << "\t" << *GEP << "\n";
|
||||
//errs() << "\t " << ICI << "\n\n\n\n";
|
||||
|
||||
// TODO: GEP 0, i, 4
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -2,7 +2,8 @@
|
|||
|
||||
@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
|
||||
i16 73, i16 82, i16 69, i16 68, i16 0]
|
||||
@GD = internal constant [3 x double] [double 1.0, double 4.0, double -20.0]
|
||||
@GD = internal constant [6 x double]
|
||||
[double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0]
|
||||
|
||||
define i1 @test1(i32 %X) {
|
||||
%P = getelementptr [10 x i16]* @G16, i32 0, i32 %X
|
||||
|
@ -25,12 +26,12 @@ define i1 @test2(i32 %X) {
|
|||
}
|
||||
|
||||
define i1 @test3(i32 %X) {
|
||||
%P = getelementptr [3 x double]* @GD, i32 0, i32 %X
|
||||
%P = getelementptr [6 x double]* @GD, i32 0, i32 %X
|
||||
%Q = load double* %P
|
||||
%R = fcmp oeq double %Q, 1.0
|
||||
ret i1 %R
|
||||
; CHECK: @test3
|
||||
; CHECK-NEXT: %R = icmp eq i32 %X, 0
|
||||
; CHECK-NEXT: %R = icmp eq i32 %X, 1
|
||||
; CHECK-NEXT: ret i1 %R
|
||||
}
|
||||
|
||||
|
@ -57,3 +58,25 @@ define i1 @test5(i32 %X) {
|
|||
; CHECK-NEXT: %R = or i1
|
||||
; CHECK-NEXT: ret i1 %R
|
||||
}
|
||||
|
||||
define i1 @test6(i32 %X) {
|
||||
%P = getelementptr [6 x double]* @GD, i32 0, i32 %X
|
||||
%Q = load double* %P
|
||||
%R = fcmp ogt double %Q, 0.0
|
||||
ret i1 %R
|
||||
; CHECK: @test6
|
||||
; CHECK-NEXT: add i32 %X, -1
|
||||
; CHECK-NEXT: %R = icmp ult i32 {{.*}}, 3
|
||||
; CHECK-NEXT: ret i1 %R
|
||||
}
|
||||
|
||||
define i1 @test7(i32 %X) {
|
||||
%P = getelementptr [6 x double]* @GD, i32 0, i32 %X
|
||||
%Q = load double* %P
|
||||
%R = fcmp olt double %Q, 0.0
|
||||
ret i1 %R
|
||||
; CHECK: @test7
|
||||
; CHECK-NEXT: add i32 %X, -1
|
||||
; CHECK-NEXT: %R = icmp ugt i32 {{.*}}, 2
|
||||
; CHECK-NEXT: ret i1 %R
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue