diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index 85d5a5ae4b90..e902a725659a 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -51,14 +51,14 @@ enum ZeroBehavior { namespace detail { template struct TrailingZerosCounter { - static unsigned count(T Val, ZeroBehavior) { + static std::size_t count(T Val, ZeroBehavior) { if (!Val) return std::numeric_limits::digits; if (Val & 0x1) return 0; // Bisection method. - unsigned ZeroBits = 0; + std::size_t ZeroBits = 0; T Shift = std::numeric_limits::digits >> 1; T Mask = std::numeric_limits::max() >> Shift; while (Shift) { @@ -75,7 +75,7 @@ template struct TrailingZerosCounter { #if __GNUC__ >= 4 || defined(_MSC_VER) template struct TrailingZerosCounter { - static unsigned count(T Val, ZeroBehavior ZB) { + static std::size_t count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) return 32; @@ -91,7 +91,7 @@ template struct TrailingZerosCounter { #if !defined(_MSC_VER) || defined(_M_X64) template struct TrailingZerosCounter { - static unsigned count(T Val, ZeroBehavior ZB) { + static std::size_t count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) return 64; @@ -116,7 +116,7 @@ template struct TrailingZerosCounter { /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are /// valid arguments. template -unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { +std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "Only unsigned integral types are allowed."); @@ -125,12 +125,12 @@ unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { namespace detail { template struct LeadingZerosCounter { - static unsigned count(T Val, ZeroBehavior) { + static std::size_t count(T Val, ZeroBehavior) { if (!Val) return std::numeric_limits::digits; // Bisection method. - unsigned ZeroBits = 0; + std::size_t ZeroBits = 0; for (T Shift = std::numeric_limits::digits >> 1; Shift; Shift >>= 1) { T Tmp = Val >> Shift; if (Tmp) @@ -144,7 +144,7 @@ template struct LeadingZerosCounter { #if __GNUC__ >= 4 || defined(_MSC_VER) template struct LeadingZerosCounter { - static unsigned count(T Val, ZeroBehavior ZB) { + static std::size_t count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) return 32; @@ -160,7 +160,7 @@ template struct LeadingZerosCounter { #if !defined(_MSC_VER) || defined(_M_X64) template struct LeadingZerosCounter { - static unsigned count(T Val, ZeroBehavior ZB) { + static std::size_t count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) return 64; @@ -185,7 +185,7 @@ template struct LeadingZerosCounter { /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are /// valid arguments. template -unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { +std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "Only unsigned integral types are allowed."); @@ -458,7 +458,7 @@ inline uint64_t ByteSwap_64(uint64_t Value) { /// \param ZB the behavior on an input of all ones. Only ZB_Width and /// ZB_Undefined are valid arguments. template -unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) { +std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "Only unsigned integral types are allowed."); @@ -474,7 +474,7 @@ unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) { /// \param ZB the behavior on an input of all ones. Only ZB_Width and /// ZB_Undefined are valid arguments. template -unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) { +std::size_t countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "Only unsigned integral types are allowed."); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 0b0dd0ae28b3..0e8a517d1d64 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1147,7 +1147,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, // These alignment values are specified in powers of two, so alignment = // 2^n. The minimum alignment is 2^4 = 16. - Out.kernarg_segment_alignment = std::max(4, + Out.kernarg_segment_alignment = std::max((size_t)4, countTrailingZeros(MaxKernArgAlign)); } diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 524c3708e7bd..90b552035af3 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5309,9 +5309,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { ConstantInt *CaseVal = CI->getCaseValue(); - if (CaseVal->getValue().ult(MinCaseVal->getValue())) + if (CaseVal->getValue().slt(MinCaseVal->getValue())) MinCaseVal = CaseVal; - if (CaseVal->getValue().ugt(MaxCaseVal->getValue())) + if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) MaxCaseVal = CaseVal; // Resulting value at phi nodes for this case value. @@ -5337,7 +5337,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, } uint64_t NumResults = ResultLists[PHIs[0]].size(); - uint64_t TableSize = MaxCaseVal->getValue().getLimitedValue() + 1; + APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); + uint64_t TableSize = RangeSpread.getLimitedValue() + 1; bool TableHasHoles = (NumResults < TableSize); // If the table has holes, we need a constant result for the default case @@ -5372,7 +5373,12 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // Compute the table index value. Builder.SetInsertPoint(SI); - Value *TableIndex = SI->getCondition(); + Value *TableIndex; + if (MinCaseVal->isNullValue()) + TableIndex = SI->getCondition(); + else + TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, + "switch.tableidx"); // Compute the maximum table size representable by the integer type we are // switching upon. @@ -5412,10 +5418,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest); - // When doing the register-sized hole-check, unconditionally use a - // subtraction. - TableIndex = Builder.CreateSub(TableIndex, MinCaseVal); - // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid // unnecessary illegal types. uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL)); @@ -5459,11 +5461,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, // If using a bitmask, use any value to fill the lookup table holes. Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; StringRef FuncName = Fn->getName(); - // Base is 0 unless using a hole check - ConstantInt *Base = - NeedMask ? MinCaseVal - : ConstantInt::get(Mod.getContext(), APInt(CaseSize, 0)); - SwitchLookupTable Table(Mod, TableSize, Base, ResultList, DV, DL, FuncName); + SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL, + FuncName); Value *Result = Table.BuildLookup(TableIndex, Builder); @@ -5508,6 +5507,17 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, return true; } +static bool isSwitchDense(ArrayRef Values) { + // See also SelectionDAGBuilder::isDense(), which this function was based on. + uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front(); + uint64_t Range = Diff + 1; + uint64_t NumCases = Values.size(); + // 40% is the default density for building a jump table in optsize/minsize mode. + uint64_t MinDensity = 40; + + return NumCases * 100 >= Range * MinDensity; +} + /// Try to transform a switch that has "holes" in it to a contiguous sequence /// of cases. /// @@ -5519,83 +5529,58 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI) { - // The number of cases that need to be removed by a subtraction operation - // to make it worth using. - const unsigned SubThreshold = (SI->getFunction()->hasOptSize() ? 2 : 8); auto *CondTy = cast(SI->getCondition()->getType()); - unsigned BitWidth = CondTy->getIntegerBitWidth(); - if (BitWidth > 64 || !DL.fitsInLegalInteger(BitWidth)) + if (CondTy->getIntegerBitWidth() > 64 || + !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth())) return false; // Only bother with this optimization if there are more than 3 switch cases; // SDAG will only bother creating jump tables for 4 or more cases. - // This is also useful when using the LowerSwitch transform, but not with - // so few cases. if (SI->getNumCases() < 4) return false; - // We organize the range to start from 0, if it is not already close. - SmallVector Values; + // This transform is agnostic to the signedness of the input or case values. We + // can treat the case values as signed or unsigned. We can optimize more common + // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values + // as signed. + SmallVector Values; for (auto &C : SI->cases()) - Values.push_back(C.getCaseValue()->getValue().getLimitedValue()); + Values.push_back(C.getCaseValue()->getValue().getSExtValue()); llvm::sort(Values); - bool MadeChanges = false; - // We must first look find the best start point, for example if we have a - // series that crosses zero: -2, -1, 0, 1, 2. - uint64_t BestDistance = - APInt::getMaxValue(CondTy->getIntegerBitWidth()).getLimitedValue() - - Values.back() + Values.front() + 1; - unsigned BestIndex = 0; - for (unsigned I = 1, E = Values.size(); I != E; I++) { - if (Values[I] - Values[I - 1] > BestDistance) { - BestIndex = I; - BestDistance = Values[I] - Values[I - 1]; - } - } + // If the switch is already dense, there's nothing useful to do here. + if (isSwitchDense(Values)) + return false; - // This transform can be done speculatively because it is so cheap - it - // results in a single rotate operation being inserted. + // First, transform the values such that they start at zero and ascend. + int64_t Base = Values[0]; + for (auto &V : Values) + V -= (uint64_t)(Base); + + // Now we have signed numbers that have been shifted so that, given enough + // precision, there are no negative values. Since the rest of the transform + // is bitwise only, we switch now to an unsigned representation. + uint64_t GCD = 0; + for (auto &V : Values) + GCD = GreatestCommonDivisor64(GCD, (uint64_t)V); + + // This transform can be done speculatively because it is so cheap - it results + // in a single rotate operation being inserted. This can only happen if the + // factor extracted is a power of 2. + // FIXME: If the GCD is an odd number we can multiply by the multiplicative + // inverse of GCD and then perform this transform. // FIXME: It's possible that optimizing a switch on powers of two might also // be beneficial - flag values are often powers of two and we could use a CLZ // as the key function. + if (GCD <= 1 || !isPowerOf2_64(GCD)) + // No common divisor found or too expensive to compute key function. + return false; - // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than - // one element and LLVM disallows duplicate cases, Shift is guaranteed to be - // less than 64. - unsigned Shift = 64; - // We need to store this from _before_ the transform - uint64_t BestIndexXor = Values[BestIndex]; + unsigned Shift = Log2_64(GCD); for (auto &V : Values) - Shift = std::min(Shift, countTrailingZeros(V ^ BestIndexXor)); - assert(Shift < 64); - if (Shift > 0) { - MadeChanges = true; - for (auto &V : Values) - V >>= Shift; - } + V = (int64_t)((uint64_t)V >> Shift); - // We Xor against Values[] (any element will do) because the if we do not - // start at zero, but also don't meet the SubThreshold, then we still might - // share common rights bits, and if this transform succeeds - // then we should insert the subtraction anyways, because the rotate trick - // below to avoid a branch needs the shifted away bits to be zero. - - // Now transform the values such that they start at zero and ascend. Do not - // do this if the shift reduces the lowest value to less than SubThreshold, - // or if the subtraction is less than SubThreshold and it does not enable a - // rotate. - uint64_t Base = 0; - if ((BestIndexXor >= SubThreshold && Shift == 0) || - (Shift > countTrailingZeros(BestIndexXor) && - Values[BestIndex] >= SubThreshold)) { - Base = BestIndexXor; - MadeChanges = true; - for (auto &V : Values) - V = (APInt(BitWidth, V) - Base).getLimitedValue(); - } - - if (!MadeChanges) - // We didn't do anything. + if (!isSwitchDense(Values)) + // Transform didn't create a dense switch. return false; // The obvious transform is to shift the switch condition right and emit a @@ -5610,22 +5595,18 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, auto *Ty = cast(SI->getCondition()->getType()); Builder.SetInsertPoint(SI); - Value *Key = SI->getCondition(); - if (Base > 0) - Key = Builder.CreateSub(Key, ConstantInt::get(Ty, Base)); - if (Shift > 0) { - // FIXME replace with fshr? - auto *ShiftC = ConstantInt::get(Ty, Shift); - auto *LShr = Builder.CreateLShr(Key, ShiftC); - auto *Shl = Builder.CreateShl(Key, Ty->getBitWidth() - Shift); - Key = Builder.CreateOr(LShr, Shl); - } - SI->replaceUsesOfWith(SI->getCondition(), Key); + auto *ShiftC = ConstantInt::get(Ty, Shift); + auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base)); + auto *LShr = Builder.CreateLShr(Sub, ShiftC); + auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift); + auto *Rot = Builder.CreateOr(LShr, Shl); + SI->replaceUsesOfWith(SI->getCondition(), Rot); for (auto Case : SI->cases()) { auto *Orig = Case.getCaseValue(); - auto Sub = Orig->getValue() - Base; - Case.setValue(cast(ConstantInt::get(Ty, Sub.lshr(Shift)))); + auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base); + Case.setValue( + cast(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue())))); } return true; } @@ -5666,9 +5647,6 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) return requestResimplify(); - if (ReduceSwitchRange(SI, Builder, DL, TTI)) - return requestResimplify(); - // The conversion from switch to lookup tables results in difficult-to-analyze // code and makes pruning branches much harder. This is a problem if the // switch expression itself can still be restricted as a result of inlining or @@ -5678,6 +5656,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { SwitchToLookupTable(SI, Builder, DL, TTI)) return requestResimplify(); + if (ReduceSwitchRange(SI, Builder, DL, TTI)) + return requestResimplify(); + return false; } diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll b/llvm/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll index 22f5e9f3cc1d..ffcf2175091f 100644 --- a/llvm/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll +++ b/llvm/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll @@ -1,14 +1,11 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -simplifycfg -mtriple=arm -mattr=+v6t2 < %s | FileCheck %s define i32 @ctlz(i32 %A) { ; CHECK-LABEL: @ctlz( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0 -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[A]], i1 true) -; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]] -; CHECK-NEXT: ret i32 [[SPEC_SELECT]] -; +; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0 +; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true) +; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]] +; CHECK-NEXT: ret i32 [[SEL]] entry: %tobool = icmp eq i32 %A, 0 br i1 %tobool, label %cond.end, label %cond.true @@ -24,12 +21,10 @@ cond.end: define i32 @cttz(i32 %A) { ; CHECK-LABEL: @cttz( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0 -; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[A]], i1 true) -; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]] -; CHECK-NEXT: ret i32 [[SPEC_SELECT]] -; +; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0 +; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true) +; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]] +; CHECK-NEXT: ret i32 [[SEL]] entry: %tobool = icmp eq i32 %A, 0 br i1 %tobool, label %cond.end, label %cond.true diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/select-trunc-i64.ll b/llvm/test/Transforms/SimplifyCFG/ARM/select-trunc-i64.ll index 9218ee185b1e..9484de77db48 100644 --- a/llvm/test/Transforms/SimplifyCFG/ARM/select-trunc-i64.ll +++ b/llvm/test/Transforms/SimplifyCFG/ARM/select-trunc-i64.ll @@ -1,20 +1,11 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ;RUN: opt -S -simplifycfg -mtriple=arm < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +; CHECK-LABEL: select_trunc_i64 +; CHECK-NOT: br +; CHECK: select +; CHECK: select define arm_aapcscc i32 @select_trunc_i64(i32 %a, i32 %b) { -; CHECK-LABEL: @select_trunc_i64( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV1]], [[CONV]] -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[ADD]], 2147483647 -; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[ADD]], -2147483648 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[TMP0]], i64 [[ADD]], i64 -2147483648 -; CHECK-NEXT: [[EXTRACT_T:%.*]] = trunc i64 [[COND]] to i32 -; CHECK-NEXT: [[COND8_OFF0:%.*]] = select i1 [[CMP]], i32 2147483647, i32 [[EXTRACT_T]] -; CHECK-NEXT: ret i32 [[COND8_OFF0]] -; entry: %conv = sext i32 %a to i64 %conv1 = sext i32 %b to i64 diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table-constant-expr.ll b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table-constant-expr.ll index f67737173426..453a76864032 100644 --- a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table-constant-expr.ll +++ b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table-constant-expr.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -simplifycfg < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" @@ -16,28 +15,11 @@ target triple = "armv7a--none-eabi" @g4 = external thread_local global i32, align 4 define i32* @test3(i32 %n) { -; CHECK-LABEL: @test3( -; CHECK-NEXT: entry: -; CHECK-NEXT: switch i32 [[N:%.*]], label [[SW_DEFAULT:%.*]] [ -; CHECK-NEXT: i32 0, label [[RETURN:%.*]] -; CHECK-NEXT: i32 1, label [[SW_BB1:%.*]] -; CHECK-NEXT: i32 2, label [[SW_BB2:%.*]] -; CHECK-NEXT: ] -; CHECK: sw.bb1: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: sw.bb2: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: sw.default: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: return: -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32* [ @g4, [[SW_DEFAULT]] ], [ getelementptr inbounds (i32, i32* inttoptr (i32 mul (i32 ptrtoint (i32* @g3 to i32), i32 2) to i32*), i32 1), [[SW_BB2]] ], [ @g2, [[SW_BB1]] ], [ @g1, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32* [[RETVAL_0]] -; entry: switch i32 %n, label %sw.default [ - i32 0, label %sw.bb - i32 1, label %sw.bb1 - i32 2, label %sw.bb2 + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 ] sw.bb: diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll index b6573de49754..501bc31bd0dd 100644 --- a/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll +++ b/llvm/test/Transforms/SimplifyCFG/ARM/switch-to-lookup-table.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -simplifycfg -switch-to-lookup -mtriple=arm -relocation-model=static < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE ; RUN: opt -S -simplifycfg -switch-to-lookup -mtriple=arm -relocation-model=pic < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE ; RUN: opt -S -simplifycfg -switch-to-lookup -mtriple=arm -relocation-model=ropi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE @@ -23,22 +22,11 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv7a--none-eabi" define i32 @test1(i32 %n) { -; CHECK-LABEL: @test1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[N:%.*]], 3 -; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] -; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* @switch.table.test1, i32 0, i32 [[N]] -; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]] -; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] -; CHECK: return: -; CHECK-NEXT: ret i32 15498 -; entry: switch i32 %n, label %sw.default [ - i32 0, label %sw.bb - i32 1, label %sw.bb1 - i32 2, label %sw.bb2 + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 ] sw.bb: @@ -65,39 +53,11 @@ return: define i32* @test2(i32 %n) { -; ENABLE-LABEL: @test2( -; ENABLE-NEXT: entry: -; ENABLE-NEXT: [[TMP0:%.*]] = icmp ult i32 [[N:%.*]], 3 -; ENABLE-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] -; ENABLE: switch.lookup: -; ENABLE-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* @switch.table.test2, i32 0, i32 [[N]] -; ENABLE-NEXT: [[SWITCH_LOAD:%.*]] = load i32*, i32** [[SWITCH_GEP]] -; ENABLE-NEXT: ret i32* [[SWITCH_LOAD]] -; ENABLE: return: -; ENABLE-NEXT: ret i32* @c4 -; -; DISABLE-LABEL: @test2( -; DISABLE-NEXT: entry: -; DISABLE-NEXT: switch i32 [[N:%.*]], label [[SW_DEFAULT:%.*]] [ -; DISABLE-NEXT: i32 0, label [[RETURN:%.*]] -; DISABLE-NEXT: i32 1, label [[SW_BB1:%.*]] -; DISABLE-NEXT: i32 2, label [[SW_BB2:%.*]] -; DISABLE-NEXT: ] -; DISABLE: sw.bb1: -; DISABLE-NEXT: br label [[RETURN]] -; DISABLE: sw.bb2: -; DISABLE-NEXT: br label [[RETURN]] -; DISABLE: sw.default: -; DISABLE-NEXT: br label [[RETURN]] -; DISABLE: return: -; DISABLE-NEXT: [[RETVAL_0:%.*]] = phi i32* [ @c4, [[SW_DEFAULT]] ], [ @c3, [[SW_BB2]] ], [ @c2, [[SW_BB1]] ], [ @c1, [[ENTRY:%.*]] ] -; DISABLE-NEXT: ret i32* [[RETVAL_0]] -; entry: switch i32 %n, label %sw.default [ - i32 0, label %sw.bb - i32 1, label %sw.bb1 - i32 2, label %sw.bb2 + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 ] sw.bb: @@ -123,39 +83,11 @@ return: @g4 = external global i32, align 4 define i32* @test3(i32 %n) { -; ENABLE-LABEL: @test3( -; ENABLE-NEXT: entry: -; ENABLE-NEXT: [[TMP0:%.*]] = icmp ult i32 [[N:%.*]], 3 -; ENABLE-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] -; ENABLE: switch.lookup: -; ENABLE-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* @switch.table.test3, i32 0, i32 [[N]] -; ENABLE-NEXT: [[SWITCH_LOAD:%.*]] = load i32*, i32** [[SWITCH_GEP]] -; ENABLE-NEXT: ret i32* [[SWITCH_LOAD]] -; ENABLE: return: -; ENABLE-NEXT: ret i32* @g4 -; -; DISABLE-LABEL: @test3( -; DISABLE-NEXT: entry: -; DISABLE-NEXT: switch i32 [[N:%.*]], label [[SW_DEFAULT:%.*]] [ -; DISABLE-NEXT: i32 0, label [[RETURN:%.*]] -; DISABLE-NEXT: i32 1, label [[SW_BB1:%.*]] -; DISABLE-NEXT: i32 2, label [[SW_BB2:%.*]] -; DISABLE-NEXT: ] -; DISABLE: sw.bb1: -; DISABLE-NEXT: br label [[RETURN]] -; DISABLE: sw.bb2: -; DISABLE-NEXT: br label [[RETURN]] -; DISABLE: sw.default: -; DISABLE-NEXT: br label [[RETURN]] -; DISABLE: return: -; DISABLE-NEXT: [[RETVAL_0:%.*]] = phi i32* [ @g4, [[SW_DEFAULT]] ], [ @g3, [[SW_BB2]] ], [ @g2, [[SW_BB1]] ], [ @g1, [[ENTRY:%.*]] ] -; DISABLE-NEXT: ret i32* [[RETVAL_0]] -; entry: switch i32 %n, label %sw.default [ - i32 0, label %sw.bb - i32 1, label %sw.bb1 - i32 2, label %sw.bb2 + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 ] sw.bb: @@ -182,26 +114,6 @@ declare i32 @f4(i32, i32) declare i32 @f5(i32, i32) define i32 @test4(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @test4( -; CHECK-NEXT: entry: -; CHECK-NEXT: switch i32 [[A:%.*]], label [[COND_FALSE6:%.*]] [ -; CHECK-NEXT: i32 1, label [[COND_END11:%.*]] -; CHECK-NEXT: i32 2, label [[COND_END11_FOLD_SPLIT:%.*]] -; CHECK-NEXT: i32 3, label [[COND_END11_FOLD_SPLIT1:%.*]] -; CHECK-NEXT: ] -; CHECK: cond.false6: -; CHECK-NEXT: [[CMP7:%.*]] = icmp eq i32 [[A]], 4 -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP7]], i32 (i32, i32)* @f4, i32 (i32, i32)* @f5 -; CHECK-NEXT: br label [[COND_END11]] -; CHECK: cond.end11.fold.split: -; CHECK-NEXT: br label [[COND_END11]] -; CHECK: cond.end11.fold.split1: -; CHECK-NEXT: br label [[COND_END11]] -; CHECK: cond.end11: -; CHECK-NEXT: [[COND12:%.*]] = phi i32 (i32, i32)* [ @f1, [[ENTRY:%.*]] ], [ [[COND]], [[COND_FALSE6]] ], [ @f2, [[COND_END11_FOLD_SPLIT]] ], [ @f3, [[COND_END11_FOLD_SPLIT1]] ] -; CHECK-NEXT: [[CALL:%.*]] = call i32 [[COND12]](i32 [[B:%.*]], i32 [[C:%.*]]) -; CHECK-NEXT: ret i32 [[CALL]] -; entry: %cmp = icmp eq i32 %a, 1 br i1 %cmp, label %cond.end11, label %cond.false diff --git a/llvm/test/Transforms/SimplifyCFG/CoveredLookupTable.ll b/llvm/test/Transforms/SimplifyCFG/CoveredLookupTable.ll index 79a6d01d0f3f..e558956d5026 100644 --- a/llvm/test/Transforms/SimplifyCFG/CoveredLookupTable.ll +++ b/llvm/test/Transforms/SimplifyCFG/CoveredLookupTable.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -simplifycfg -switch-to-lookup -S %s | FileCheck %s ; RUN: opt -passes='simplify-cfg' -S %s | FileCheck %s ; rdar://15268442 @@ -6,28 +5,24 @@ target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin12.0.0" +; CHECK-LABEL: define i3 @coveredswitch_test( +; CHECK: entry: +; CHECK-NEXT: sub i3 %input, -4 +; CHECK-NEXT: zext i3 %switch.tableidx to i24 +; CHECK-NEXT: mul i24 %switch.cast, 3 +; CHECK-NEXT: lshr i24 7507338, %switch.shiftamt +; CHECK-NEXT: trunc i24 %switch.downshift to i3 +; CHECK-NEXT: ret i3 %switch.masked + define i3 @coveredswitch_test(i3 %input) { -; CHECK-LABEL: @coveredswitch_test( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[INPUT:%.*]], -2 -; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[BB8:%.*]] -; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_CAST:%.*]] = zext i3 [[INPUT]] to i18 -; CHECK-NEXT: [[SWITCH_SHIFTAMT:%.*]] = mul i18 [[SWITCH_CAST]], 3 -; CHECK-NEXT: [[SWITCH_DOWNSHIFT:%.*]] = lshr i18 42792, [[SWITCH_SHIFTAMT]] -; CHECK-NEXT: [[SWITCH_MASKED:%.*]] = trunc i18 [[SWITCH_DOWNSHIFT]] to i3 -; CHECK-NEXT: ret i3 [[SWITCH_MASKED]] -; CHECK: bb8: -; CHECK-NEXT: ret i3 -2 -; entry: switch i3 %input, label %bb8 [ - i3 0, label %bb7 - i3 1, label %bb - i3 2, label %bb3 - i3 3, label %bb4 - i3 4, label %bb5 - i3 5, label %bb6 + i3 0, label %bb7 + i3 1, label %bb + i3 2, label %bb3 + i3 3, label %bb4 + i3 4, label %bb5 + i3 5, label %bb6 ] bb: ; preds = %entry diff --git a/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll index 05e5e8639374..a8758a789ec4 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/disable-lookup-table.ll @@ -1,38 +1,19 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -simplifycfg -switch-to-lookup -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s ; RUN: opt < %s -passes='simplify-cfg' -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; In the presence of "-no-jump-tables"="true", simplifycfg should not convert switches to lookup tables. +; CHECK: @switch.table.bar = private unnamed_addr constant [4 x i32] [i32 55, i32 123, i32 0, i32 -1] +; CHECK-LABEL: foo +; CHECK-NOT: @switch.table.foo = private unnamed_addr constant [4 x i32] [i32 55, i32 123, i32 0, i32 -1] + define i32 @foo(i32 %c) "no-jump-tables"="true" { -; CHECK-LABEL: @foo( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[C:%.*]], 42 -; CHECK-NEXT: switch i32 [[TMP0]], label [[SW_DEFAULT:%.*]] [ -; CHECK-NEXT: i32 0, label [[RETURN:%.*]] -; CHECK-NEXT: i32 1, label [[SW_BB1:%.*]] -; CHECK-NEXT: i32 2, label [[SW_BB2:%.*]] -; CHECK-NEXT: i32 3, label [[SW_BB3:%.*]] -; CHECK-NEXT: ] -; CHECK: sw.bb1: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: sw.bb2: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: sw.bb3: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: sw.default: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: return: -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 15, [[SW_DEFAULT]] ], [ -1, [[SW_BB3]] ], [ 0, [[SW_BB2]] ], [ 123, [[SW_BB1]] ], [ 55, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[RETVAL_0]] -; entry: switch i32 %c, label %sw.default [ - i32 42, label %return - i32 43, label %sw.bb1 - i32 44, label %sw.bb2 - i32 45, label %sw.bb3 + i32 42, label %return + i32 43, label %sw.bb1 + i32 44, label %sw.bb2 + i32 45, label %sw.bb3 ] sw.bb1: br label %return @@ -46,24 +27,12 @@ return: define i32 @bar(i32 %c) { -; CHECK-LABEL: @bar( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[C:%.*]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 4 -; CHECK-NEXT: br i1 [[TMP1]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN:%.*]] -; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table.bar, i32 0, i32 [[TMP0]] -; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, i32* [[SWITCH_GEP]] -; CHECK-NEXT: ret i32 [[SWITCH_LOAD]] -; CHECK: return: -; CHECK-NEXT: ret i32 15 -; entry: switch i32 %c, label %sw.default [ - i32 42, label %return - i32 43, label %sw.bb1 - i32 44, label %sw.bb2 - i32 45, label %sw.bb3 + i32 42, label %return + i32 43, label %sw.bb1 + i32 44, label %sw.bb2 + i32 45, label %sw.bb3 ] sw.bb1: br label %return diff --git a/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll b/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll index 11ba3984f35d..bee80e6acce0 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll @@ -1,31 +1,14 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -simplifycfg -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s --check-prefix=ALL --check-prefix=BMI ; RUN: opt -S -simplifycfg -mtriple=x86_64-unknown-unknown -mattr=+lzcnt < %s | FileCheck %s --check-prefix=ALL --check-prefix=LZCNT ; RUN: opt -S -simplifycfg -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=ALL --check-prefix=GENERIC define i64 @test1(i64 %A) { -; BMI-LABEL: @test1( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0 -; BMI-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[A]], i1 true) -; BMI-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]] -; BMI-NEXT: ret i64 [[COND]] -; -; LZCNT-LABEL: @test1( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0 -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[A]], i1 true) -; LZCNT-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]] -; LZCNT-NEXT: ret i64 [[SPEC_SELECT]] -; -; GENERIC-LABEL: @test1( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0 -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[A]], i1 true) -; GENERIC-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]] -; GENERIC-NEXT: ret i64 [[COND]] -; +; ALL-LABEL: @test1( +; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0 +; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true) +; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTLZ]] +; ALL: ret entry: %tobool = icmp eq i64 %A, 0 br i1 %tobool, label %cond.end, label %cond.true @@ -40,27 +23,11 @@ cond.end: ; preds = %entry, %cond.true } define i32 @test2(i32 %A) { -; BMI-LABEL: @test2( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0 -; BMI-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[A]], i1 true) -; BMI-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]] -; BMI-NEXT: ret i32 [[COND]] -; -; LZCNT-LABEL: @test2( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0 -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[A]], i1 true) -; LZCNT-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]] -; LZCNT-NEXT: ret i32 [[SPEC_SELECT]] -; -; GENERIC-LABEL: @test2( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0 -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[A]], i1 true) -; GENERIC-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]] -; GENERIC-NEXT: ret i32 [[COND]] -; +; ALL-LABEL: @test2( +; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0 +; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true) +; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTLZ]] +; ALL: ret entry: %tobool = icmp eq i32 %A, 0 br i1 %tobool, label %cond.end, label %cond.true @@ -76,27 +43,11 @@ cond.end: ; preds = %entry, %cond.true define signext i16 @test3(i16 signext %A) { -; BMI-LABEL: @test3( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0 -; BMI-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.ctlz.i16(i16 [[A]], i1 true) -; BMI-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]] -; BMI-NEXT: ret i16 [[COND]] -; -; LZCNT-LABEL: @test3( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0 -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.ctlz.i16(i16 [[A]], i1 true) -; LZCNT-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]] -; LZCNT-NEXT: ret i16 [[SPEC_SELECT]] -; -; GENERIC-LABEL: @test3( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0 -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.ctlz.i16(i16 [[A]], i1 true) -; GENERIC-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]] -; GENERIC-NEXT: ret i16 [[COND]] -; +; ALL-LABEL: @test3( +; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0 +; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true) +; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTLZ]] +; ALL: ret entry: %tobool = icmp eq i16 %A, 0 br i1 %tobool, label %cond.end, label %cond.true @@ -112,27 +63,11 @@ cond.end: ; preds = %entry, %cond.true define i64 @test1b(i64 %A) { -; BMI-LABEL: @test1b( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0 -; BMI-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[A]], i1 true) -; BMI-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]] -; BMI-NEXT: ret i64 [[SPEC_SELECT]] -; -; LZCNT-LABEL: @test1b( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0 -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[A]], i1 true) -; LZCNT-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]] -; LZCNT-NEXT: ret i64 [[COND]] -; -; GENERIC-LABEL: @test1b( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[A:%.*]], 0 -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[A]], i1 true) -; GENERIC-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i64 64, i64 [[TMP0]] -; GENERIC-NEXT: ret i64 [[COND]] -; +; ALL-LABEL: @test1b( +; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0 +; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true) +; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTTZ]] +; ALL: ret entry: %tobool = icmp eq i64 %A, 0 br i1 %tobool, label %cond.end, label %cond.true @@ -148,27 +83,11 @@ cond.end: ; preds = %entry, %cond.true define i32 @test2b(i32 %A) { -; BMI-LABEL: @test2b( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0 -; BMI-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[A]], i1 true) -; BMI-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]] -; BMI-NEXT: ret i32 [[SPEC_SELECT]] -; -; LZCNT-LABEL: @test2b( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0 -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[A]], i1 true) -; LZCNT-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]] -; LZCNT-NEXT: ret i32 [[COND]] -; -; GENERIC-LABEL: @test2b( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[A:%.*]], 0 -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[A]], i1 true) -; GENERIC-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 32, i32 [[TMP0]] -; GENERIC-NEXT: ret i32 [[COND]] -; +; ALL-LABEL: @test2b( +; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0 +; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true) +; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTTZ]] +; ALL: ret entry: %tobool = icmp eq i32 %A, 0 br i1 %tobool, label %cond.end, label %cond.true @@ -184,27 +103,11 @@ cond.end: ; preds = %entry, %cond.true define signext i16 @test3b(i16 signext %A) { -; BMI-LABEL: @test3b( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0 -; BMI-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.cttz.i16(i16 [[A]], i1 true) -; BMI-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]] -; BMI-NEXT: ret i16 [[SPEC_SELECT]] -; -; LZCNT-LABEL: @test3b( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0 -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.cttz.i16(i16 [[A]], i1 true) -; LZCNT-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]] -; LZCNT-NEXT: ret i16 [[COND]] -; -; GENERIC-LABEL: @test3b( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i16 [[A:%.*]], 0 -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.cttz.i16(i16 [[A]], i1 true) -; GENERIC-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 16, i16 [[TMP0]] -; GENERIC-NEXT: ret i16 [[COND]] -; +; ALL-LABEL: @test3b( +; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0 +; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true) +; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTTZ]] +; ALL: ret entry: %tobool = icmp eq i16 %A, 0 br i1 %tobool, label %cond.end, label %cond.true @@ -223,38 +126,14 @@ cond.end: ; preds = %entry, %cond.true ; for the target. define i64 @test1e(i32 %x) { -; BMI-LABEL: @test1e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; BMI-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true) -; BMI-NEXT: [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64 -; BMI-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i64 32, i64 [[PHITMP2]] -; BMI-NEXT: ret i64 [[COND]] -; -; LZCNT-LABEL: @test1e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; LZCNT-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; LZCNT: cond.true: -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true) -; LZCNT-NEXT: [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64 -; LZCNT-NEXT: br label [[COND_END]] -; LZCNT: cond.end: -; LZCNT-NEXT: [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; LZCNT-NEXT: ret i64 [[COND]] -; -; GENERIC-LABEL: @test1e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true) -; GENERIC-NEXT: [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i64 [[COND]] -; +; ALL-LABEL: @test1e( +; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0 +; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) +; ALL: [[ZEXT:%[A-Za-z0-9]+]] = zext i32 [[CTTZ]] to i64 +; BMI-NEXT: select i1 [[COND]], i64 32, i64 [[ZEXT]] +; LZCNT-NOT: select +; GENERIC-NOT: select +; ALL: ret entry: %tobool = icmp eq i32 %x, 0 br i1 %tobool, label %cond.end, label %cond.true @@ -270,38 +149,14 @@ cond.end: ; preds = %entry, %cond.true } define i32 @test2e(i64 %x) { -; BMI-LABEL: @test2e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; BMI-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true) -; BMI-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i32 -; BMI-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]] -; BMI-NEXT: ret i32 [[COND]] -; -; LZCNT-LABEL: @test2e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; LZCNT-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; LZCNT: cond.true: -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true) -; LZCNT-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i32 -; LZCNT-NEXT: br label [[COND_END]] -; LZCNT: cond.end: -; LZCNT-NEXT: [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; LZCNT-NEXT: ret i32 [[COND]] -; -; GENERIC-LABEL: @test2e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true) -; GENERIC-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i32 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i32 [[COND]] -; +; ALL-LABEL: @test2e( +; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0 +; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) +; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTTZ]] to i32 +; BMI-NEXT: select i1 [[COND]], i32 64, i32 [[TRUNC]] +; LZCNT-NOT: select +; GENERIC-NOT: select +; ALL: ret entry: %tobool = icmp eq i64 %x, 0 br i1 %tobool, label %cond.end, label %cond.true @@ -317,38 +172,14 @@ cond.end: ; preds = %entry, %cond.true } define i64 @test3e(i32 %x) { -; BMI-LABEL: @test3e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; BMI-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; BMI: cond.true: -; BMI-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true) -; BMI-NEXT: [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64 -; BMI-NEXT: br label [[COND_END]] -; BMI: cond.end: -; BMI-NEXT: [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; BMI-NEXT: ret i64 [[COND]] -; -; LZCNT-LABEL: @test3e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true) -; LZCNT-NEXT: [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64 -; LZCNT-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i64 32, i64 [[PHITMP2]] -; LZCNT-NEXT: ret i64 [[COND]] -; -; GENERIC-LABEL: @test3e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true) -; GENERIC-NEXT: [[PHITMP2:%.*]] = zext i32 [[TMP0]] to i64 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i64 [ [[PHITMP2]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i64 [[COND]] -; +; ALL-LABEL: @test3e( +; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0 +; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) +; ALL: [[ZEXT:%[A-Za-z0-9]+]] = zext i32 [[CTLZ]] to i64 +; LZCNT-NEXT: select i1 [[COND]], i64 32, i64 [[ZEXT]] +; BMI-NOT: select +; GENERIC-NOT: select +; ALL: ret entry: %tobool = icmp eq i32 %x, 0 br i1 %tobool, label %cond.end, label %cond.true @@ -364,38 +195,14 @@ cond.end: ; preds = %entry, %cond.true } define i32 @test4e(i64 %x) { -; BMI-LABEL: @test4e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; BMI-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; BMI: cond.true: -; BMI-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true) -; BMI-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i32 -; BMI-NEXT: br label [[COND_END]] -; BMI: cond.end: -; BMI-NEXT: [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; BMI-NEXT: ret i32 [[COND]] -; -; LZCNT-LABEL: @test4e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true) -; LZCNT-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i32 -; LZCNT-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i32 64, i32 [[CAST]] -; LZCNT-NEXT: ret i32 [[COND]] -; -; GENERIC-LABEL: @test4e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true) -; GENERIC-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i32 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i32 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i32 [[COND]] -; +; ALL-LABEL: @test4e( +; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0 +; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) +; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTLZ]] to i32 +; LZCNT-NEXT: select i1 [[COND]], i32 64, i32 [[TRUNC]] +; BMI-NOT: select +; GENERIC-NOT: select +; ALL: ret entry: %tobool = icmp eq i64 %x, 0 br i1 %tobool, label %cond.end, label %cond.true @@ -411,38 +218,14 @@ cond.end: ; preds = %entry, %cond.true } define i16 @test5e(i64 %x) { -; BMI-LABEL: @test5e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; BMI-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; BMI: cond.true: -; BMI-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true) -; BMI-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i16 -; BMI-NEXT: br label [[COND_END]] -; BMI: cond.end: -; BMI-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; BMI-NEXT: ret i16 [[COND]] -; -; LZCNT-LABEL: @test5e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true) -; LZCNT-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i16 -; LZCNT-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 64, i16 [[CAST]] -; LZCNT-NEXT: ret i16 [[COND]] -; -; GENERIC-LABEL: @test5e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[X]], i1 true) -; GENERIC-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i16 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i16 [[COND]] -; +; ALL-LABEL: @test5e( +; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0 +; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true) +; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTLZ]] to i16 +; LZCNT-NEXT: select i1 [[COND]], i16 64, i16 [[TRUNC]] +; BMI-NOT: select +; GENERIC-NOT: select +; ALL: ret entry: %tobool = icmp eq i64 %x, 0 br i1 %tobool, label %cond.end, label %cond.true @@ -458,38 +241,14 @@ cond.end: ; preds = %entry, %cond.true } define i16 @test6e(i32 %x) { -; BMI-LABEL: @test6e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; BMI-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; BMI: cond.true: -; BMI-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true) -; BMI-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 -; BMI-NEXT: br label [[COND_END]] -; BMI: cond.end: -; BMI-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; BMI-NEXT: ret i16 [[COND]] -; -; LZCNT-LABEL: @test6e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true) -; LZCNT-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 -; LZCNT-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]] -; LZCNT-NEXT: ret i16 [[COND]] -; -; GENERIC-LABEL: @test6e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X]], i1 true) -; GENERIC-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i16 [[COND]] -; +; ALL-LABEL: @test6e( +; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0 +; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) +; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i32 [[CTLZ]] to i16 +; LZCNT-NEXT: select i1 [[COND]], i16 32, i16 [[TRUNC]] +; BMI-NOT: select +; GENERIC-NOT: select +; ALL: ret entry: %tobool = icmp eq i32 %x, 0 br i1 %tobool, label %cond.end, label %cond.true @@ -505,38 +264,14 @@ cond.end: ; preds = %entry, %cond.true } define i16 @test7e(i64 %x) { -; BMI-LABEL: @test7e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; BMI-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true) -; BMI-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i16 -; BMI-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 64, i16 [[CAST]] -; BMI-NEXT: ret i16 [[COND]] -; -; LZCNT-LABEL: @test7e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; LZCNT-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; LZCNT: cond.true: -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true) -; LZCNT-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i16 -; LZCNT-NEXT: br label [[COND_END]] -; LZCNT: cond.end: -; LZCNT-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; LZCNT-NEXT: ret i16 [[COND]] -; -; GENERIC-LABEL: @test7e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.cttz.i64(i64 [[X]], i1 true) -; GENERIC-NEXT: [[CAST:%.*]] = trunc i64 [[TMP0]] to i16 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 64, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i16 [[COND]] -; +; ALL-LABEL: @test7e( +; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %x, 0 +; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %x, i1 true) +; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i64 [[CTTZ]] to i16 +; BMI-NEXT: select i1 [[COND]], i16 64, i16 [[TRUNC]] +; LZCNT-NOT: select +; GENERIC-NOT: select +; ALL: ret entry: %tobool = icmp eq i64 %x, 0 br i1 %tobool, label %cond.end, label %cond.true @@ -552,38 +287,14 @@ cond.end: ; preds = %entry, %cond.true } define i16 @test8e(i32 %x) { -; BMI-LABEL: @test8e( -; BMI-NEXT: entry: -; BMI-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; BMI-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true) -; BMI-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 -; BMI-NEXT: [[COND:%.*]] = select i1 [[TOBOOL]], i16 32, i16 [[CAST]] -; BMI-NEXT: ret i16 [[COND]] -; -; LZCNT-LABEL: @test8e( -; LZCNT-NEXT: entry: -; LZCNT-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; LZCNT-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; LZCNT: cond.true: -; LZCNT-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true) -; LZCNT-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 -; LZCNT-NEXT: br label [[COND_END]] -; LZCNT: cond.end: -; LZCNT-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; LZCNT-NEXT: ret i16 [[COND]] -; -; GENERIC-LABEL: @test8e( -; GENERIC-NEXT: entry: -; GENERIC-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[X:%.*]], 0 -; GENERIC-NEXT: br i1 [[TOBOOL]], label [[COND_END:%.*]], label [[COND_TRUE:%.*]] -; GENERIC: cond.true: -; GENERIC-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X]], i1 true) -; GENERIC-NEXT: [[CAST:%.*]] = trunc i32 [[TMP0]] to i16 -; GENERIC-NEXT: br label [[COND_END]] -; GENERIC: cond.end: -; GENERIC-NEXT: [[COND:%.*]] = phi i16 [ [[CAST]], [[COND_TRUE]] ], [ 32, [[ENTRY:%.*]] ] -; GENERIC-NEXT: ret i16 [[COND]] -; +; ALL-LABEL: @test8e( +; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %x, 0 +; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %x, i1 true) +; ALL: [[TRUNC:%[A-Za-z0-9]+]] = trunc i32 [[CTTZ]] to i16 +; BMI-NEXT: select i1 [[COND]], i16 32, i16 [[TRUNC]] +; LZCNT-NOT: select +; GENERIC-NOT: select +; ALL: ret entry: %tobool = icmp eq i32 %x, 0 br i1 %tobool, label %cond.end, label %cond.true diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll index 08d266c7c912..c42568ffa935 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -simplifycfg -switch-to-lookup < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s ; RUN: opt -S -passes='simplify-cfg' < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s @@ -6,29 +5,25 @@ target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin12.0.0" +; When we have a covered lookup table, make sure we don't delete PHINodes that +; are cached in PHIs. +; CHECK-LABEL: @test +; CHECK: entry: +; CHECK-NEXT: sub i3 %arg, -4 +; CHECK-NEXT: zext i3 %switch.tableidx to i4 +; CHECK-NEXT: getelementptr inbounds [8 x i64], [8 x i64]* @switch.table.test, i32 0, i4 %switch.tableidx.zext +; CHECK-NEXT: load i64, i64* %switch.gep +; CHECK-NEXT: add i64 +; CHECK-NEXT: ret i64 define i64 @test(i3 %arg) { -; CHECK-LABEL: @test( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i3 [[ARG:%.*]], -1 -; CHECK-NEXT: br i1 [[TMP0]], label [[SWITCH_LOOKUP:%.*]], label [[DEFAULT:%.*]] -; CHECK: switch.lookup: -; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i3 [[ARG]] to i4 -; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i64], [7 x i64]* @switch.table.test, i32 0, i4 [[SWITCH_TABLEIDX_ZEXT]] -; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i64, i64* [[SWITCH_GEP]] -; CHECK-NEXT: br label [[DEFAULT]] -; CHECK: Default: -; CHECK-NEXT: [[V1:%.*]] = phi i64 [ 8, [[ENTRY:%.*]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ] -; CHECK-NEXT: [[V3:%.*]] = add i64 [[V1]], 0 -; CHECK-NEXT: ret i64 [[V3]] -; entry: switch i3 %arg, label %Default [ - i3 -2, label %Label6 - i3 1, label %Label1 - i3 2, label %Label2 - i3 3, label %Label3 - i3 -4, label %Label4 - i3 -3, label %Label5 + i3 -2, label %Label6 + i3 1, label %Label1 + i3 2, label %Label2 + i3 3, label %Label3 + i3 -4, label %Label4 + i3 -3, label %Label5 ] Default: diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll index 5d2297f58d40..0b9d6ebe8258 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -simplifycfg -switch-to-lookup < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s ; RUN: opt -S -passes='simplify-cfg' < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s @@ -6,18 +5,21 @@ target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin12.0.0" +; When tableindex can't fit into i2, we should extend the type to i3. +; CHECK-LABEL: @_TFO6reduce1E5toRawfS0_FT_Si +; CHECK: entry: +; CHECK-NEXT: sub i2 %0, -2 +; CHECK-NEXT: zext i2 %switch.tableidx to i3 +; CHECK-NEXT: getelementptr inbounds [4 x i64], [4 x i64]* @switch.table._TFO6reduce1E5toRawfS0_FT_Si, i32 0, i3 %switch.tableidx.zext +; CHECK-NEXT: load i64, i64* %switch.gep +; CHECK-NEXT: ret i64 %switch.load define i64 @_TFO6reduce1E5toRawfS0_FT_Si(i2) { -; CHECK-LABEL: @_TFO6reduce1E5toRawfS0_FT_Si( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SWITCH_IDX_CAST:%.*]] = zext i2 [[TMP0:%.*]] to i64 -; CHECK-NEXT: ret i64 [[SWITCH_IDX_CAST]] -; entry: switch i2 %0, label %1 [ - i2 0, label %2 - i2 1, label %3 - i2 -2, label %4 - i2 -1, label %5 + i2 0, label %2 + i2 1, label %3 + i2 -2, label %4 + i2 -1, label %5 ] ;