From 8a62fc52942f8c5e551c53345780aa9996051da5 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Thu, 6 Sep 2012 09:43:28 +0000 Subject: [PATCH] Build lookup tables for switches (PR884) This adds a transformation to SimplifyCFG that attemps to turn switch instructions into loads from lookup tables. It works on switches that are only used to initialize one or more phi nodes in a common successor basic block, for example: int f(int x) { switch (x) { case 0: return 5; case 1: return 4; case 2: return -2; case 5: return 7; case 6: return 9; default: return 42; } This speeds up the code by removing the hard-to-predict jump, and reduces code size by removing the code for the jump targets. llvm-svn: 163302 --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 286 ++++++++++++++++++ .../Transforms/SimplifyCFG/switch_create.ll | 5 +- .../SimplifyCFG/switch_to_lookup_table.ll | 140 +++++++++ 3 files changed, 429 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 6cd3bbc4d8b6..62b98cb8fcae 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -22,6 +22,7 @@ #include "llvm/LLVMContext.h" #include "llvm/MDBuilder.h" #include "llvm/Metadata.h" +#include "llvm/Module.h" #include "llvm/Operator.h" #include "llvm/Type.h" #include "llvm/ADT/DenseMap.h" @@ -54,6 +55,7 @@ DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); +STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); namespace { /// ValueEqualityComparisonCase - Represents a case of a switch. @@ -2977,6 +2979,287 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { return Changed; } +/// ValidLookupTableConstant - Return true if the backend will be able to handle +/// initializing an array of constants like C. +bool ValidLookupTableConstant(Constant *C) { + if (ConstantExpr *CE = dyn_cast(C)) + return CE->isGEPWithNoNotionalOverIndexing(); + + return isa(C) || + isa(C) || + isa(C) || + isa(C) || + isa(C); +} + +/// GetCaseResulsts - Try to determine the resulting constant values in phi +/// nodes at the common destination basic block for one of the case +/// destinations of a switch instruction. +static bool GetCaseResults(SwitchInst *SI, + BasicBlock *CaseDest, + BasicBlock **CommonDest, + SmallVector, 4> &Res) { + // The block from which we enter the common destination. + BasicBlock *Pred = SI->getParent(); + + // If CaseDest is empty, continue to its successor. + if (CaseDest->getFirstNonPHIOrDbg() == CaseDest->getTerminator() && + !isa(CaseDest->begin())) { + + TerminatorInst *Terminator = CaseDest->getTerminator(); + if (Terminator->getNumSuccessors() != 1) + return false; + + Pred = CaseDest; + CaseDest = Terminator->getSuccessor(0); + } + + // If we did not have a CommonDest before, use the current one. + if (!*CommonDest) + *CommonDest = CaseDest; + // If the destination isn't the common one, abort. + if (CaseDest != *CommonDest) + return false; + + // Get the values for this case from phi nodes in the destination block. + BasicBlock::iterator I = (*CommonDest)->begin(); + while (PHINode *PHI = dyn_cast(I++)) { + int Idx = PHI->getBasicBlockIndex(Pred); + if (Idx == -1) + continue; + + Constant *ConstVal = dyn_cast(PHI->getIncomingValue(Idx)); + if (!ConstVal) + return false; + + // Be conservative about which kinds of constants we support. + if (!ValidLookupTableConstant(ConstVal)) + return false; + + Res.push_back(std::make_pair(PHI, ConstVal)); + } + + return true; +} + +/// BuildLookupTable - Build a lookup table with the contents of Results, using +/// DefaultResult to fill the holes in the table. If the table ends up +/// containing the same result in each element, set *SingleResult to that value +/// and return NULL. +static GlobalVariable *BuildLookupTable( + Module &M, + uint64_t TableSize, + ConstantInt *Offset, + const std::vector >& Results, + Constant *DefaultResult, + Constant **SingleResult) { + assert(Results.size() && "Need values to build lookup table"); + assert(TableSize >= Results.size() && "Table needs to hold all values"); + + // If all values in the table are equal, this is that value. + Constant *SameResult = Results.begin()->second; + + // Build up the table contents. + std::vector TableContents(TableSize); + for (size_t I = 0, E = Results.size(); I != E; ++I) { + ConstantInt *CaseVal = Results[I].first; + Constant *CaseRes = Results[I].second; + + uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); + TableContents[Idx] = CaseRes; + + if (CaseRes != SameResult) + SameResult = NULL; + } + + // Fill in any holes in the table with the default result. + if (Results.size() < TableSize) { + for (unsigned i = 0; i < TableSize; ++i) { + if (!TableContents[i]) + TableContents[i] = DefaultResult; + } + + if (DefaultResult != SameResult) + SameResult = NULL; + } + + // Same result was used in the entire table; just return that. + if (SameResult) { + *SingleResult = SameResult; + return NULL; + } + + ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize); + Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); + + GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true, + GlobalVariable::PrivateLinkage, + Initializer, + "switch.table"); + GV->setUnnamedAddr(true); + return GV; +} + +/// SwitchToLookupTable - If the switch is only used to initialize one or more +/// phi nodes in a common successor block with different constant values, +/// replace the switch with lookup tables. +static bool SwitchToLookupTable(SwitchInst *SI, + IRBuilder<> &Builder) { + assert(SI->getNumCases() > 1 && "Degenerate switch?"); + // FIXME: Handle unreachable cases. + + // FIXME: If the switch is too sparse for a lookup table, perhaps we could + // split off a dense part and build a lookup table for that. + + // FIXME: If the results are all integers and the lookup table would fit in a + // target-legal register, we should store them as a bitmap and use shift/mask + // to look up the result. + + // FIXME: This creates arrays of GEPs to constant strings, which means each + // GEP needs a runtime relocation in PIC code. We should just build one big + // string and lookup indices into that. + + // Ignore the switch if the number of cases are too small. + // This is similar to the check when building jump tables in + // SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Determine the best cut-off. + if (SI->getNumCases() < 4) + return false; + + // Figure out the corresponding result for each case value and phi node in the + // common destination, as well as the the min and max case values. + assert(SI->case_begin() != SI->case_end()); + SwitchInst::CaseIt CI = SI->case_begin(); + ConstantInt *MinCaseVal = CI.getCaseValue(); + ConstantInt *MaxCaseVal = CI.getCaseValue(); + + BasicBlock *CommonDest = NULL; + typedef std::vector > ResultListTy; + SmallDenseMap ResultLists; + SmallDenseMap DefaultResults; + SmallDenseMap ResultTypes; + SmallVector PHIs; + + for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { + ConstantInt *CaseVal = CI.getCaseValue(); + if (CaseVal->getValue().slt(MinCaseVal->getValue())) + MinCaseVal = CaseVal; + if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) + MaxCaseVal = CaseVal; + + // Resulting value at phi nodes for this case value. + typedef SmallVector, 4> ResultsTy; + ResultsTy Results; + if (!GetCaseResults(SI, CI.getCaseSuccessor(), &CommonDest, Results)) + return false; + + // Append the result from this case to the list for each phi. + for (ResultsTy::iterator I = Results.begin(), E = Results.end(); I!=E; ++I) { + if (!ResultLists.count(I->first)) + PHIs.push_back(I->first); + ResultLists[I->first].push_back(std::make_pair(CaseVal, I->second)); + } + } + + // Get the resulting values for the default case. + { + SmallVector, 4> DefaultResultsList; + if (!GetCaseResults(SI, SI->getDefaultDest(), &CommonDest, DefaultResultsList)) + return false; + for (size_t I = 0, E = DefaultResultsList.size(); I != E; ++I) { + PHINode *PHI = DefaultResultsList[I].first; + Constant *Result = DefaultResultsList[I].second; + DefaultResults[PHI] = Result; + ResultTypes[PHI] = Result->getType(); + } + } + + APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); + // The table density should be at lest 40%. This is the same criterion as for + // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. + // FIXME: Find the best cut-off. + // Be careful to avoid overlow in the density computation. + if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1)) + return false; + uint64_t TableSize = RangeSpread.getLimitedValue() + 1; + if (SI->getNumCases() * 10 < TableSize * 4) + return false; + + // Build the lookup tables. + SmallDenseMap LookupTables; + SmallDenseMap SingleResults; + + Module &Mod = *CommonDest->getParent()->getParent(); + for (SmallDenseMap::iterator I = ResultLists.begin(), + E = ResultLists.end(); I != E; ++I) { + PHINode *PHI = I->first; + + Constant *SingleResult = NULL; + LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal, I->second, + DefaultResults[PHI], &SingleResult); + SingleResults[PHI] = SingleResult; + } + + // Create the BB that does the lookups. + BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(), + "switch.lookup", + CommonDest->getParent(), + CommonDest); + + // Check whether the condition value is within the case range, and branch to + // the new BB. + Builder.SetInsertPoint(SI); + Value *TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, + "switch.tableidx"); + Value *Cmp = Builder.CreateICmpULT(TableIndex, ConstantInt::get( + MinCaseVal->getType(), TableSize)); + Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + + // Populate the BB that does the lookups. + Builder.SetInsertPoint(LookupBB); + bool ReturnedEarly = false; + for (SmallVector::iterator I = PHIs.begin(), E = PHIs.end(); + I != E; ++I) { + PHINode *PHI = *I; + // There was a single result for this phi; just use that. + if (Constant *SingleResult = SingleResults[PHI]) { + PHI->addIncoming(SingleResult, LookupBB); + continue; + } + + Value *GEPIndices[] = { Builder.getInt32(0), TableIndex }; + Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices, + "switch.gep"); + Value *Result = Builder.CreateLoad(GEP, "switch.load"); + + // If the result is only going to be used to return from the function, + // we want to do that right here. + if (PHI->hasOneUse() && isa(*PHI->use_begin())) { + if (CommonDest->getFirstNonPHIOrDbg() == CommonDest->getTerminator()) { + Builder.CreateRet(Result); + ReturnedEarly = true; + } + } + + if (!ReturnedEarly) + PHI->addIncoming(Result, LookupBB); + } + + if (!ReturnedEarly) + Builder.CreateBr(CommonDest); + + // Remove the switch. + for (unsigned i = 0; i < SI->getNumSuccessors(); ++i) { + BasicBlock *Succ = SI->getSuccessor(i); + if (Succ == SI->getDefaultDest()) continue; + Succ->removePredecessor(SI->getParent()); + } + SI->eraseFromParent(); + + ++NumLookupTables; + return true; +} + bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // If this switch is too complex to want to look at, ignore it. if (!isValueEqualityComparison(SI)) @@ -3016,6 +3299,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (ForwardSwitchConditionToPHI(SI)) return SimplifyCFG(BB) | true; + if (SwitchToLookupTable(SI, Builder)) + return SimplifyCFG(BB) | true; + return false; } diff --git a/llvm/test/Transforms/SimplifyCFG/switch_create.ll b/llvm/test/Transforms/SimplifyCFG/switch_create.ll index 546cc75f2973..b28e4a45509a 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch_create.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch_create.ll @@ -141,8 +141,9 @@ UnifiedReturnBlock: ; preds = %shortcirc_done.4, %shortcirc_next.4 ret i1 %UnifiedRetVal ; CHECK: @test6 -; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14 -; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6 +; CHECK: %switch.tableidx = sub i32 %tmp.2.i, 14 +; CHECK: %0 = icmp ult i32 %switch.tableidx, 6 +; CHECK: select i1 %0, i1 true, i1 false } define void @test7(i8 zeroext %c, i32 %x) nounwind ssp noredzone { diff --git a/llvm/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll new file mode 100644 index 000000000000..c19982139411 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/switch_to_lookup_table.ll @@ -0,0 +1,140 @@ +; RUN: opt < %s -simplifycfg -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; The table for @f +; CHECK: @switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1] + +; The float table for @h +; CHECK: @switch.table1 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000] + +; The int table for @h +; CHECK: @switch.table2 = private unnamed_addr constant [4 x i8] c"*\09X\05" + +; The table for @foostring +; CHECK: @switch.table3 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)] + +; A simple int-to-int selection switch. +; It is dense enough to be replaced by table lookup. +; The result is directly by a ret from an otherwise empty bb, +; so we return early, directly from the lookup bb. + +define i32 @f(i32 %c) nounwind uwtable readnone { +entry: + switch i32 %c, label %sw.default [ + i32 42, label %return + i32 43, label %sw.bb1 + i32 44, label %sw.bb2 + i32 45, label %sw.bb3 + i32 46, label %sw.bb4 + i32 47, label %sw.bb5 + i32 48, label %sw.bb6 + ] + +sw.bb1: br label %return +sw.bb2: br label %return +sw.bb3: br label %return +sw.bb4: br label %return +sw.bb5: br label %return +sw.bb6: br label %return +sw.default: br label %return +return: + %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ] + ret i32 %retval.0 + +; CHECK: @f +; CHECK: entry: +; CHECK-NEXT: %switch.tableidx = sub i32 %c, 42 +; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 7 +; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return +; CHECK: switch.lookup: +; CHECK-NEXT: %switch.gep = getelementptr inbounds [7 x i32]* @switch.table, i32 0, i32 %switch.tableidx +; CHECK-NEXT: %switch.load = load i32* %switch.gep +; CHECK-NEXT: ret i32 %switch.load +; CHECK: return: +; CHECK-NEXT: ret i32 15 +} + +; A switch used to initialize two variables, an i8 and a float. + +declare void @dummy(i8 signext, float) +define void @h(i32 %x) { +entry: + switch i32 %x, label %sw.default [ + i32 0, label %sw.epilog + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] + +sw.bb1: br label %sw.epilog +sw.bb2: br label %sw.epilog +sw.bb3: br label %sw.epilog +sw.default: br label %sw.epilog + +sw.epilog: + %a.0 = phi i8 [ 7, %sw.default ], [ 5, %sw.bb3 ], [ 88, %sw.bb2 ], [ 9, %sw.bb1 ], [ 42, %entry ] + %b.0 = phi float [ 0x4023FAE140000000, %sw.default ], [ 0x4001AE1480000000, %sw.bb3 ], [ 0x4012449BA0000000, %sw.bb2 ], [ 0x3FF3BE76C0000000, %sw.bb1 ], [ 0x40091EB860000000, %entry ] + call void @dummy(i8 signext %a.0, float %b.0) + ret void + +; CHECK: @h +; CHECK: entry: +; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0 +; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4 +; CHECK-NEXT: br i1 %0, label %switch.lookup, label %sw.epilog +; CHECK: switch.lookup: +; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8]* @switch.table2, i32 0, i32 %switch.tableidx +; CHECK-NEXT: %switch.load = load i8* %switch.gep +; CHECK-NEXT: %switch.gep1 = getelementptr inbounds [4 x float]* @switch.table1, i32 0, i32 %switch.tableidx +; CHECK-NEXT: %switch.load2 = load float* %switch.gep1 +; CHECK-NEXT: br label %sw.epilog +; CHECK: sw.epilog: +; CHECK-NEXT: %a.0 = phi i8 [ %switch.load, %switch.lookup ], [ 7, %entry ] +; CHECK-NEXT: %b.0 = phi float [ %switch.load2, %switch.lookup ], [ 0x4023FAE140000000, %entry ] +; CHECK-NEXT: call void @dummy(i8 signext %a.0, float %b.0) +; CHECK-NEXT: ret void +} + + +; Switch used to return a string. + +@.str = private unnamed_addr constant [4 x i8] c"foo\00", align 1 +@.str1 = private unnamed_addr constant [4 x i8] c"bar\00", align 1 +@.str2 = private unnamed_addr constant [4 x i8] c"baz\00", align 1 +@.str3 = private unnamed_addr constant [4 x i8] c"qux\00", align 1 +@.str4 = private unnamed_addr constant [6 x i8] c"error\00", align 1 + +define i8* @foostring(i32 %x) { +entry: + switch i32 %x, label %sw.default [ + i32 0, label %return + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] + +sw.bb1: br label %return +sw.bb2: br label %return +sw.bb3: br label %return +sw.default: br label %return + +return: + %retval.0 = phi i8* [ getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0), %sw.default ], + [ getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ], + [ getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ], + [ getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ], + [ getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), %entry ] + ret i8* %retval.0 + +; CHECK: @foostring +; CHECK: entry: +; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0 +; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4 +; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return +; CHECK: switch.lookup: +; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table3, i32 0, i32 %switch.tableidx +; CHECK-NEXT: %switch.load = load i8** %switch.gep +; CHECK-NEXT: ret i8* %switch.load +}