forked from OSchip/llvm-project
[SimplifyCFG] Range reduce switches
If a switch is sparse and all the cases (once sorted) are in arithmetic progression, we can extract the common factor out of the switch and create a dense switch. For example: switch (i) { case 5: ... case 9: ... case 13: ... case 17: ... } can become: if ( (i - 5) % 4 ) goto default; switch ((i - 5) / 4) { case 0: ... case 1: ... case 2: ... case 3: ... } or even better: switch ( ROTR(i - 5, 2) { case 0: ... case 1: ... case 2: ... case 3: ... } The division and remainder operations could be costly so we only do this if the factor is a power of two, and emit a right-rotate instead of a divide/remainder sequence. Dense switches can be lowered significantly better than sparse switches and can even be transformed into lookup tables. llvm-svn: 277325
This commit is contained in:
parent
9f0546b5a9
commit
b2e436de42
|
@ -5038,6 +5038,109 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool isSwitchDense(ArrayRef<int64_t> Values) {
|
||||
// See also SelectionDAGBuilder::isDense(), which this function was based on.
|
||||
uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
|
||||
uint64_t Range = Diff + 1;
|
||||
uint64_t NumCases = Values.size();
|
||||
// 40% is the default density for building a jump table in optsize/minsize mode.
|
||||
uint64_t MinDensity = 40;
|
||||
|
||||
return NumCases * 100 >= Range * MinDensity;
|
||||
}
|
||||
|
||||
// Try and transform a switch that has "holes" in it to a contiguous sequence
|
||||
// of cases.
|
||||
//
|
||||
// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
|
||||
// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
|
||||
//
|
||||
// This converts a sparse switch into a dense switch which allows better
|
||||
// lowering and could also allow transforming into a lookup table.
|
||||
static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
|
||||
const DataLayout &DL,
|
||||
const TargetTransformInfo &TTI) {
|
||||
auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
|
||||
if (CondTy->getIntegerBitWidth() > 64 ||
|
||||
!DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
|
||||
return false;
|
||||
// Only bother with this optimization if there are more than 3 switch cases;
|
||||
// SDAG will only bother creating jump tables for 4 or more cases.
|
||||
if (SI->getNumCases() < 4)
|
||||
return false;
|
||||
|
||||
// This transform is agnostic to the signedness of the input or case values. We
|
||||
// can treat the case values as signed or unsigned. We can optimize more common
|
||||
// cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
|
||||
// as signed.
|
||||
SmallVector<int64_t,4> Values;
|
||||
for (auto &C : SI->cases())
|
||||
Values.push_back(C.getCaseValue()->getValue().getSExtValue());
|
||||
std::sort(Values.begin(), Values.end());
|
||||
|
||||
// If the switch is already dense, there's nothing useful to do here.
|
||||
if (isSwitchDense(Values))
|
||||
return false;
|
||||
|
||||
// First, transform the values such that they start at zero and ascend.
|
||||
int64_t Base = Values[0];
|
||||
for (auto &V : Values)
|
||||
V -= Base;
|
||||
|
||||
// Now we have signed numbers that have been shifted so that, given enough
|
||||
// precision, there are no negative values. Since the rest of the transform
|
||||
// is bitwise only, we switch now to an unsigned representation.
|
||||
uint64_t GCD = 0;
|
||||
for (auto &V : Values)
|
||||
GCD = llvm::GreatestCommonDivisor64(GCD, (uint64_t)V);
|
||||
|
||||
// This transform can be done speculatively because it is so cheap - it results
|
||||
// in a single rotate operation being inserted. This can only happen if the
|
||||
// factor extracted is a power of 2.
|
||||
// FIXME: If the GCD is an odd number we can multiply by the multiplicative
|
||||
// inverse of GCD and then perform this transform.
|
||||
// FIXME: It's possible that optimizing a switch on powers of two might also
|
||||
// be beneficial - flag values are often powers of two and we could use a CLZ
|
||||
// as the key function.
|
||||
if (GCD <= 1 || !llvm::isPowerOf2_64(GCD))
|
||||
// No common divisor found or too expensive to compute key function.
|
||||
return false;
|
||||
|
||||
unsigned Shift = llvm::Log2_64(GCD);
|
||||
for (auto &V : Values)
|
||||
V = (int64_t)((uint64_t)V >> Shift);
|
||||
|
||||
if (!isSwitchDense(Values))
|
||||
// Transform didn't create a dense switch.
|
||||
return false;
|
||||
|
||||
// The obvious transform is to shift the switch condition right and emit a
|
||||
// check that the condition actually cleanly divided by GCD, i.e.
|
||||
// C & (1 << Shift - 1) == 0
|
||||
// inserting a new CFG edge to handle the case where it didn't divide cleanly.
|
||||
//
|
||||
// A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
|
||||
// shift and puts the shifted-off bits in the uppermost bits. If any of these
|
||||
// are nonzero then the switch condition will be very large and will hit the
|
||||
// default case.
|
||||
|
||||
auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
|
||||
Builder.SetInsertPoint(SI);
|
||||
auto *ShiftC = ConstantInt::get(Ty, Shift);
|
||||
auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
|
||||
auto *Rot = Builder.CreateOr(Builder.CreateLShr(Sub, ShiftC),
|
||||
Builder.CreateShl(Sub, Ty->getBitWidth() - Shift));
|
||||
SI->replaceUsesOfWith(SI->getCondition(), Rot);
|
||||
|
||||
for (auto &C : SI->cases()) {
|
||||
auto *Orig = C.getCaseValue();
|
||||
auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
|
||||
SI->replaceUsesOfWith(Orig,
|
||||
ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue())));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
|
||||
BasicBlock *BB = SI->getParent();
|
||||
|
||||
|
@ -5081,6 +5184,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
|
|||
if (SwitchToLookupTable(SI, Builder, DL, TTI))
|
||||
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
||||
|
||||
if (ReduceSwitchRange(SI, Builder, DL, TTI))
|
||||
return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,195 @@
|
|||
; RUN: opt < %s -simplifycfg -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-n32"
|
||||
|
||||
; CHECK-LABEL: @test1
|
||||
; CHECK: %1 = sub i32 %a, 97
|
||||
; CHECK: %2 = lshr i32 %1, 2
|
||||
; CHECK: %3 = shl i32 %1, 30
|
||||
; CHECK: %4 = or i32 %2, %3
|
||||
; CHECK: switch i32 %4, label %def [
|
||||
; CHECK: i32 0, label %one
|
||||
; CHECK: i32 1, label %two
|
||||
; CHECK: i32 2, label %three
|
||||
; CHECK: ]
|
||||
define i32 @test1(i32 %a) {
|
||||
switch i32 %a, label %def [
|
||||
i32 97, label %one
|
||||
i32 101, label %two
|
||||
i32 105, label %three
|
||||
i32 109, label %three
|
||||
]
|
||||
|
||||
def:
|
||||
ret i32 8867
|
||||
|
||||
one:
|
||||
ret i32 11984
|
||||
two:
|
||||
ret i32 1143
|
||||
three:
|
||||
ret i32 99783
|
||||
}
|
||||
|
||||
; Optimization shouldn't trigger; bitwidth > 64
|
||||
; CHECK-LABEL: @test2
|
||||
; CHECK: switch i128 %a, label %def
|
||||
define i128 @test2(i128 %a) {
|
||||
switch i128 %a, label %def [
|
||||
i128 97, label %one
|
||||
i128 101, label %two
|
||||
i128 105, label %three
|
||||
i128 109, label %three
|
||||
]
|
||||
|
||||
def:
|
||||
ret i128 8867
|
||||
|
||||
one:
|
||||
ret i128 11984
|
||||
two:
|
||||
ret i128 1143
|
||||
three:
|
||||
ret i128 99783
|
||||
}
|
||||
|
||||
|
||||
; Optimization shouldn't trigger; no holes present
|
||||
; CHECK-LABEL: @test3
|
||||
; CHECK: switch i32 %a, label %def
|
||||
define i32 @test3(i32 %a) {
|
||||
switch i32 %a, label %def [
|
||||
i32 97, label %one
|
||||
i32 98, label %two
|
||||
i32 99, label %three
|
||||
]
|
||||
|
||||
def:
|
||||
ret i32 8867
|
||||
|
||||
one:
|
||||
ret i32 11984
|
||||
two:
|
||||
ret i32 1143
|
||||
three:
|
||||
ret i32 99783
|
||||
}
|
||||
|
||||
; Optimization shouldn't trigger; not an arithmetic progression
|
||||
; CHECK-LABEL: @test4
|
||||
; CHECK: switch i32 %a, label %def
|
||||
define i32 @test4(i32 %a) {
|
||||
switch i32 %a, label %def [
|
||||
i32 97, label %one
|
||||
i32 102, label %two
|
||||
i32 105, label %three
|
||||
i32 109, label %three
|
||||
]
|
||||
|
||||
def:
|
||||
ret i32 8867
|
||||
|
||||
one:
|
||||
ret i32 11984
|
||||
two:
|
||||
ret i32 1143
|
||||
three:
|
||||
ret i32 99783
|
||||
}
|
||||
|
||||
; Optimization shouldn't trigger; not a power of two
|
||||
; CHECK-LABEL: @test5
|
||||
; CHECK: switch i32 %a, label %def
|
||||
define i32 @test5(i32 %a) {
|
||||
switch i32 %a, label %def [
|
||||
i32 97, label %one
|
||||
i32 102, label %two
|
||||
i32 107, label %three
|
||||
i32 112, label %three
|
||||
]
|
||||
|
||||
def:
|
||||
ret i32 8867
|
||||
|
||||
one:
|
||||
ret i32 11984
|
||||
two:
|
||||
ret i32 1143
|
||||
three:
|
||||
ret i32 99783
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @test6
|
||||
; CHECK: %1 = sub i32 %a, -109
|
||||
; CHECK: %2 = lshr i32 %1, 2
|
||||
; CHECK: %3 = shl i32 %1, 30
|
||||
; CHECK: %4 = or i32 %2, %3
|
||||
; CHECK: switch i32 %4, label %def [
|
||||
define i32 @test6(i32 %a) optsize {
|
||||
switch i32 %a, label %def [
|
||||
i32 -97, label %one
|
||||
i32 -101, label %two
|
||||
i32 -105, label %three
|
||||
i32 -109, label %three
|
||||
]
|
||||
|
||||
def:
|
||||
ret i32 8867
|
||||
|
||||
one:
|
||||
ret i32 11984
|
||||
two:
|
||||
ret i32 1143
|
||||
three:
|
||||
ret i32 99783
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @test7
|
||||
; CHECK: %1 = sub i8 %a, -36
|
||||
; CHECK: %2 = lshr i8 %1, 2
|
||||
; CHECK: %3 = shl i8 %1, 6
|
||||
; CHECK: %4 = or i8 %2, %3
|
||||
; CHECK: switch.tableidx = {{.*}} %4
|
||||
define i8 @test7(i8 %a) optsize {
|
||||
switch i8 %a, label %def [
|
||||
i8 220, label %one
|
||||
i8 224, label %two
|
||||
i8 228, label %three
|
||||
i8 232, label %three
|
||||
]
|
||||
|
||||
def:
|
||||
ret i8 8867
|
||||
|
||||
one:
|
||||
ret i8 11984
|
||||
two:
|
||||
ret i8 1143
|
||||
three:
|
||||
ret i8 99783
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @test8
|
||||
; CHECK: %1 = sub i32 %a, 97
|
||||
; CHECK: %2 = lshr i32 %1, 2
|
||||
; CHECK: %3 = shl i32 %1, 30
|
||||
; CHECK: %4 = or i32 %2, %3
|
||||
; CHECK: switch i32 %4, label %def [
|
||||
define i32 @test8(i32 %a) optsize {
|
||||
switch i32 %a, label %def [
|
||||
i32 97, label %one
|
||||
i32 101, label %two
|
||||
i32 105, label %three
|
||||
i32 113, label %three
|
||||
]
|
||||
|
||||
def:
|
||||
ret i32 8867
|
||||
|
||||
one:
|
||||
ret i32 11984
|
||||
two:
|
||||
ret i32 1143
|
||||
three:
|
||||
ret i32 99783
|
||||
}
|
Loading…
Reference in New Issue