Build lookup tables for switches (PR884)

This adds a transformation to SimplifyCFG that attemps to turn switch
instructions into loads from lookup tables. It works on switches that
are only used to initialize one or more phi nodes in a common successor
basic block, for example:

  int f(int x) {
    switch (x) {
    case 0: return 5;
    case 1: return 4;
    case 2: return -2;
    case 5: return 7;
    case 6: return 9;
    default: return 42;
  }

This speeds up the code by removing the hard-to-predict jump, and
reduces code size by removing the code for the jump targets.

llvm-svn: 163302
This commit is contained in:
Hans Wennborg 2012-09-06 09:43:28 +00:00
parent 447ff70280
commit 8a62fc5294
3 changed files with 429 additions and 2 deletions

View File

@ -22,6 +22,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/MDBuilder.h"
#include "llvm/Metadata.h"
#include "llvm/Module.h"
#include "llvm/Operator.h"
#include "llvm/Type.h"
#include "llvm/ADT/DenseMap.h"
@ -54,6 +55,7 @@ DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
cl::desc("Duplicate return instructions into unconditional branches"));
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
namespace {
/// ValueEqualityComparisonCase - Represents a case of a switch.
@ -2977,6 +2979,287 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
return Changed;
}
/// ValidLookupTableConstant - Return true if the backend will be able to handle
/// initializing an array of constants like C.
bool ValidLookupTableConstant(Constant *C) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
return CE->isGEPWithNoNotionalOverIndexing();
return isa<ConstantFP>(C) ||
isa<ConstantInt>(C) ||
isa<ConstantPointerNull>(C) ||
isa<GlobalValue>(C) ||
isa<UndefValue>(C);
}
/// GetCaseResulsts - Try to determine the resulting constant values in phi
/// nodes at the common destination basic block for one of the case
/// destinations of a switch instruction.
static bool GetCaseResults(SwitchInst *SI,
BasicBlock *CaseDest,
BasicBlock **CommonDest,
SmallVector<std::pair<PHINode*,Constant*>, 4> &Res) {
// The block from which we enter the common destination.
BasicBlock *Pred = SI->getParent();
// If CaseDest is empty, continue to its successor.
if (CaseDest->getFirstNonPHIOrDbg() == CaseDest->getTerminator() &&
!isa<PHINode>(CaseDest->begin())) {
TerminatorInst *Terminator = CaseDest->getTerminator();
if (Terminator->getNumSuccessors() != 1)
return false;
Pred = CaseDest;
CaseDest = Terminator->getSuccessor(0);
}
// If we did not have a CommonDest before, use the current one.
if (!*CommonDest)
*CommonDest = CaseDest;
// If the destination isn't the common one, abort.
if (CaseDest != *CommonDest)
return false;
// Get the values for this case from phi nodes in the destination block.
BasicBlock::iterator I = (*CommonDest)->begin();
while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
int Idx = PHI->getBasicBlockIndex(Pred);
if (Idx == -1)
continue;
Constant *ConstVal = dyn_cast<Constant>(PHI->getIncomingValue(Idx));
if (!ConstVal)
return false;
// Be conservative about which kinds of constants we support.
if (!ValidLookupTableConstant(ConstVal))
return false;
Res.push_back(std::make_pair(PHI, ConstVal));
}
return true;
}
/// BuildLookupTable - Build a lookup table with the contents of Results, using
/// DefaultResult to fill the holes in the table. If the table ends up
/// containing the same result in each element, set *SingleResult to that value
/// and return NULL.
static GlobalVariable *BuildLookupTable(
Module &M,
uint64_t TableSize,
ConstantInt *Offset,
const std::vector<std::pair<ConstantInt*,Constant*> >& Results,
Constant *DefaultResult,
Constant **SingleResult) {
assert(Results.size() && "Need values to build lookup table");
assert(TableSize >= Results.size() && "Table needs to hold all values");
// If all values in the table are equal, this is that value.
Constant *SameResult = Results.begin()->second;
// Build up the table contents.
std::vector<Constant*> TableContents(TableSize);
for (size_t I = 0, E = Results.size(); I != E; ++I) {
ConstantInt *CaseVal = Results[I].first;
Constant *CaseRes = Results[I].second;
uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
TableContents[Idx] = CaseRes;
if (CaseRes != SameResult)
SameResult = NULL;
}
// Fill in any holes in the table with the default result.
if (Results.size() < TableSize) {
for (unsigned i = 0; i < TableSize; ++i) {
if (!TableContents[i])
TableContents[i] = DefaultResult;
}
if (DefaultResult != SameResult)
SameResult = NULL;
}
// Same result was used in the entire table; just return that.
if (SameResult) {
*SingleResult = SameResult;
return NULL;
}
ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize);
Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
GlobalVariable::PrivateLinkage,
Initializer,
"switch.table");
GV->setUnnamedAddr(true);
return GV;
}
/// SwitchToLookupTable - If the switch is only used to initialize one or more
/// phi nodes in a common successor block with different constant values,
/// replace the switch with lookup tables.
static bool SwitchToLookupTable(SwitchInst *SI,
IRBuilder<> &Builder) {
assert(SI->getNumCases() > 1 && "Degenerate switch?");
// FIXME: Handle unreachable cases.
// FIXME: If the switch is too sparse for a lookup table, perhaps we could
// split off a dense part and build a lookup table for that.
// FIXME: If the results are all integers and the lookup table would fit in a
// target-legal register, we should store them as a bitmap and use shift/mask
// to look up the result.
// FIXME: This creates arrays of GEPs to constant strings, which means each
// GEP needs a runtime relocation in PIC code. We should just build one big
// string and lookup indices into that.
// Ignore the switch if the number of cases are too small.
// This is similar to the check when building jump tables in
// SelectionDAGBuilder::handleJTSwitchCase.
// FIXME: Determine the best cut-off.
if (SI->getNumCases() < 4)
return false;
// Figure out the corresponding result for each case value and phi node in the
// common destination, as well as the the min and max case values.
assert(SI->case_begin() != SI->case_end());
SwitchInst::CaseIt CI = SI->case_begin();
ConstantInt *MinCaseVal = CI.getCaseValue();
ConstantInt *MaxCaseVal = CI.getCaseValue();
BasicBlock *CommonDest = NULL;
typedef std::vector<std::pair<ConstantInt*, Constant*> > ResultListTy;
SmallDenseMap<PHINode*, ResultListTy> ResultLists;
SmallDenseMap<PHINode*, Constant*> DefaultResults;
SmallDenseMap<PHINode*, Type*> ResultTypes;
SmallVector<PHINode*, 4> PHIs;
for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
ConstantInt *CaseVal = CI.getCaseValue();
if (CaseVal->getValue().slt(MinCaseVal->getValue()))
MinCaseVal = CaseVal;
if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
MaxCaseVal = CaseVal;
// Resulting value at phi nodes for this case value.
typedef SmallVector<std::pair<PHINode*, Constant*>, 4> ResultsTy;
ResultsTy Results;
if (!GetCaseResults(SI, CI.getCaseSuccessor(), &CommonDest, Results))
return false;
// Append the result from this case to the list for each phi.
for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) {
if (!ResultLists.count(I->first))
PHIs.push_back(I->first);
ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second));
}
}
// Get the resulting values for the default case.
{
SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
if (!GetCaseResults(SI, SI->getDefaultDest(), &CommonDest, DefaultResultsList))
return false;
for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) {
PHINode *PHI = DefaultResultsList[I].first;
Constant *Result = DefaultResultsList[I].second;
DefaultResults[PHI] = Result;
ResultTypes[PHI] = Result->getType();
}
}
APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
// The table density should be at lest 40%. This is the same criterion as for
// jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
// FIXME: Find the best cut-off.
// Be careful to avoid overlow in the density computation.
if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1))
return false;
uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
if (SI->getNumCases() * 10 < TableSize * 4)
return false;
// Build the lookup tables.
SmallDenseMap<PHINode*, GlobalVariable*> LookupTables;
SmallDenseMap<PHINode*, Constant*> SingleResults;
Module &Mod = *CommonDest->getParent()->getParent();
for (SmallDenseMap<PHINode*, ResultListTy>::iterator I = ResultLists.begin(),
E = ResultLists.end(); I != E; ++I) {
PHINode *PHI = I->first;
Constant *SingleResult = NULL;
LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal, I->second,
DefaultResults[PHI], &SingleResult);
SingleResults[PHI] = SingleResult;
}
// Create the BB that does the lookups.
BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(),
"switch.lookup",
CommonDest->getParent(),
CommonDest);
// Check whether the condition value is within the case range, and branch to
// the new BB.
Builder.SetInsertPoint(SI);
Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
"switch.tableidx");
Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get(
MinCaseVal->getType(), TableSize));
Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
// Populate the BB that does the lookups.
Builder.SetInsertPoint(LookupBB);
bool ReturnedEarly = false;
for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end();
I != E; ++I) {
PHINode *PHI = *I;
// There was a single result for this phi; just use that.
if (Constant *SingleResult = SingleResults[PHI]) {
PHI->addIncoming(SingleResult, LookupBB);
continue;
}
Value *GEPIndices[] = { Builder.getInt32(0), TableIndex };
Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices,
"switch.gep");
Value *Result = Builder.CreateLoad(GEP, "switch.load");
// If the result is only going to be used to return from the function,
// we want to do that right here.
if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->use_begin())) {
if (CommonDest->getFirstNonPHIOrDbg() == CommonDest->getTerminator()) {
Builder.CreateRet(Result);
ReturnedEarly = true;
}
}
if (!ReturnedEarly)
PHI->addIncoming(Result, LookupBB);
}
if (!ReturnedEarly)
Builder.CreateBr(CommonDest);
// Remove the switch.
for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) {
BasicBlock *Succ = SI->getSuccessor(i);
if (Succ == SI->getDefaultDest()) continue;
Succ->removePredecessor(SI->getParent());
}
SI->eraseFromParent();
++NumLookupTables;
return true;
}
bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// If this switch is too complex to want to look at, ignore it.
if (!isValueEqualityComparison(SI))
@ -3016,6 +3299,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
if (ForwardSwitchConditionToPHI(SI))
return SimplifyCFG(BB) | true;
if (SwitchToLookupTable(SI, Builder))
return SimplifyCFG(BB) | true;
return false;
}

View File

@ -141,8 +141,9 @@ UnifiedReturnBlock: ; preds = %shortcirc_done.4, %shortcirc_next.4
ret i1 %UnifiedRetVal
; CHECK: @test6
; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
; CHECK: %switch.tableidx = sub i32 %tmp.2.i, 14
; CHECK: %0 = icmp ult i32 %switch.tableidx, 6
; CHECK: select i1 %0, i1 true, i1 false
}
define void @test7(i8 zeroext %c, i32 %x) nounwind ssp noredzone {

View File

@ -0,0 +1,140 @@
; RUN: opt < %s -simplifycfg -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; The table for @f
; CHECK: @switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1]
; The float table for @h
; CHECK: @switch.table1 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
; The int table for @h
; CHECK: @switch.table2 = private unnamed_addr constant [4 x i8] c"*\09X\05"
; The table for @foostring
; CHECK: @switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)]
; A simple int-to-int selection switch.
; It is dense enough to be replaced by table lookup.
; The result is directly by a ret from an otherwise empty bb,
; so we return early, directly from the lookup bb.
define i32 @f(i32 %c) nounwind uwtable readnone {
entry:
switch i32 %c, label %sw.default [
i32 42, label %return
i32 43, label %sw.bb1
i32 44, label %sw.bb2
i32 45, label %sw.bb3
i32 46, label %sw.bb4
i32 47, label %sw.bb5
i32 48, label %sw.bb6
]
sw.bb1: br label %return
sw.bb2: br label %return
sw.bb3: br label %return
sw.bb4: br label %return
sw.bb5: br label %return
sw.bb6: br label %return
sw.default: br label %return
return:
%retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
ret i32 %retval.0
; CHECK: @f
; CHECK: entry:
; CHECK-NEXT: %switch.tableidx = sub i32 %c, 42
; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 7
; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
; CHECK: switch.lookup:
; CHECK-NEXT: %switch.gep = getelementptr inbounds [7 x i32]* @switch.table, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load i32* %switch.gep
; CHECK-NEXT: ret i32 %switch.load
; CHECK: return:
; CHECK-NEXT: ret i32 15
}
; A switch used to initialize two variables, an i8 and a float.
declare void @dummy(i8 signext, float)
define void @h(i32 %x) {
entry:
switch i32 %x, label %sw.default [
i32 0, label %sw.epilog
i32 1, label %sw.bb1
i32 2, label %sw.bb2
i32 3, label %sw.bb3
]
sw.bb1: br label %sw.epilog
sw.bb2: br label %sw.epilog
sw.bb3: br label %sw.epilog
sw.default: br label %sw.epilog
sw.epilog:
%a.0 = phi i8 [ 7, %sw.default ], [ 5, %sw.bb3 ], [ 88, %sw.bb2 ], [ 9, %sw.bb1 ], [ 42, %entry ]
%b.0 = phi float [ 0x4023FAE140000000, %sw.default ], [ 0x4001AE1480000000, %sw.bb3 ], [ 0x4012449BA0000000, %sw.bb2 ], [ 0x3FF3BE76C0000000, %sw.bb1 ], [ 0x40091EB860000000, %entry ]
call void @dummy(i8 signext %a.0, float %b.0)
ret void
; CHECK: @h
; CHECK: entry:
; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0
; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
; CHECK-NEXT: br i1 %0, label %switch.lookup, label %sw.epilog
; CHECK: switch.lookup:
; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8]* @switch.table2, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load i8* %switch.gep
; CHECK-NEXT: %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table1, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load2 = load float* %switch.gep1
; CHECK-NEXT: br label %sw.epilog
; CHECK: sw.epilog:
; CHECK-NEXT: %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ]
; CHECK-NEXT: %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
; CHECK-NEXT: call void @dummy(i8 signext %a.0, float %b.0)
; CHECK-NEXT: ret void
}
; Switch used to return a string.
@.str = private unnamed_addr constant [4 x i8] c"foo\00", align 1
@.str1 = private unnamed_addr constant [4 x i8] c"bar\00", align 1
@.str2 = private unnamed_addr constant [4 x i8] c"baz\00", align 1
@.str3 = private unnamed_addr constant [4 x i8] c"qux\00", align 1
@.str4 = private unnamed_addr constant [6 x i8] c"error\00", align 1
define i8* @foostring(i32 %x) {
entry:
switch i32 %x, label %sw.default [
i32 0, label %return
i32 1, label %sw.bb1
i32 2, label %sw.bb2
i32 3, label %sw.bb3
]
sw.bb1: br label %return
sw.bb2: br label %return
sw.bb3: br label %return
sw.default: br label %return
return:
%retval.0 = phi i8* [ getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0), %sw.default ],
[ getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ],
[ getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ],
[ getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ],
[ getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), %entry ]
ret i8* %retval.0
; CHECK: @foostring
; CHECK: entry:
; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0
; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
; CHECK: switch.lookup:
; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx
; CHECK-NEXT: %switch.load = load i8** %switch.gep
; CHECK-NEXT: ret i8* %switch.load
}