diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 650a6608e4e0..29e903e49f70 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2569,6 +2569,73 @@ static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI, return nullptr; } +// Match the following IR pattern: +// %x.lowbits = and i8 %x, %lowbitmask +// %x.lowbits.are.zero = icmp eq i8 %x.lowbits, 0 +// %x.biased = add i8 %x, %bias +// %x.biased.highbits = and i8 %x.biased, %highbitmask +// %x.roundedup = select i1 %x.lowbits.are.zero, i8 %x, i8 %x.biased.highbits +// Define: +// %alignment = add i8 %lowbitmask, 1 +// Iff 1. an %alignment is a power-of-two (aka, %lowbitmask is a low bit mask) +// and 2. %bias is equal to either %lowbitmask or %alignment, +// and 3. %highbitmask is equal to ~%lowbitmask (aka, to -%alignment) +// then this pattern can be transformed into: +// %x.offset = add i8 %x, %lowbitmask +// %x.roundedup = and i8 %x.offset, %highbitmask +static Instruction * +foldRoundUpIntegerWithPow2Alignment(SelectInst &SI, + InstCombiner::BuilderTy &Builder) { + Value *Cond = SI.getCondition(); + Value *X = SI.getTrueValue(); + Value *XBiasedHighBits = SI.getFalseValue(); + + ICmpInst::Predicate Pred; + Value *XLowBits; + if (!match(Cond, m_ICmp(Pred, m_Value(XLowBits), m_ZeroInt())) || + !ICmpInst::isEquality(Pred)) + return nullptr; + + if (Pred == ICmpInst::Predicate::ICMP_NE) + std::swap(X, XBiasedHighBits); + + // FIXME: if BiasCst is equal to LowBitMaskCst, + // we could just return XBiasedHighBits. + if (!XBiasedHighBits->hasOneUse()) + return nullptr; + + // FIXME: we could support non non-splats here. + + const APInt *LowBitMaskCst; + if (!match(XLowBits, m_And(m_Specific(X), m_APIntAllowUndef(LowBitMaskCst)))) + return nullptr; + + const APInt *BiasCst, *HighBitMaskCst; + if (!match(XBiasedHighBits, + m_And(m_Add(m_Specific(X), m_APIntAllowUndef(BiasCst)), + m_APIntAllowUndef(HighBitMaskCst)))) + return nullptr; + + if (!LowBitMaskCst->isMask()) + return nullptr; + + APInt InvertedLowBitMaskCst = ~*LowBitMaskCst; + if (InvertedLowBitMaskCst != *HighBitMaskCst) + return nullptr; + + APInt AlignmentCst = *LowBitMaskCst + 1; + + if (*BiasCst != AlignmentCst && *BiasCst != *LowBitMaskCst) + return nullptr; + + // FIXME: could we preserve undef's here? + Type *Ty = X->getType(); + Value *XOffset = Builder.CreateAdd(X, ConstantInt::get(Ty, *LowBitMaskCst), + X->getName() + ".offset"); + return BinaryOperator::CreateAnd(XOffset, + ConstantInt::get(Ty, *HighBitMaskCst)); +} + Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -3114,6 +3181,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (Value *Fr = foldSelectWithFrozenICmp(SI, Builder)) return replaceInstUsesWith(SI, Fr); + if (Instruction *I = foldRoundUpIntegerWithPow2Alignment(SI, Builder)) + return I; + // select(mask, mload(,,mask,0), 0) -> mload(,,mask,0) // Load inst is intentionally not checked for hasOneUse() if (match(FalseVal, m_Zero()) && diff --git a/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll b/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll index eeeb9fb1e622..d29a238a40b7 100644 --- a/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll +++ b/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll @@ -10,11 +10,8 @@ declare void @llvm.assume(i1) ; Basic pattern define i8 @t0(i8 %x) { ; CHECK-LABEL: @t0( -; CHECK-NEXT: [[X_LOWBITS:%.*]] = and i8 [[X:%.*]], 15 -; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq i8 [[X_LOWBITS]], 0 -; CHECK-NEXT: [[X_BIASED:%.*]] = add i8 [[X]], 16 -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and i8 [[X_BIASED]], -16 -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select i1 [[X_LOWBITS_ARE_ZERO]], i8 [[X]], i8 [[X_BIASED_HIGHBITS]] +; CHECK-NEXT: [[X_OFFSET:%.*]] = add i8 [[X:%.*]], 15 +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and i8 [[X_OFFSET]], -16 ; CHECK-NEXT: ret i8 [[X_ROUNDEDUP]] ; %x.lowbits = and i8 %x, 15 @@ -28,11 +25,8 @@ define i8 @t0(i8 %x) { ; Another alignment is fine define i8 @t1(i8 %x) { ; CHECK-LABEL: @t1( -; CHECK-NEXT: [[X_LOWBITS:%.*]] = and i8 [[X:%.*]], 31 -; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq i8 [[X_LOWBITS]], 0 -; CHECK-NEXT: [[X_BIASED:%.*]] = add i8 [[X]], 32 -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and i8 [[X_BIASED]], -32 -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select i1 [[X_LOWBITS_ARE_ZERO]], i8 [[X]], i8 [[X_BIASED_HIGHBITS]] +; CHECK-NEXT: [[X_OFFSET:%.*]] = add i8 [[X:%.*]], 31 +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and i8 [[X_OFFSET]], -32 ; CHECK-NEXT: ret i8 [[X_ROUNDEDUP]] ; %x.lowbits = and i8 %x, 31 @@ -46,11 +40,8 @@ define i8 @t1(i8 %x) { ; Bias can be either the alignment or alignment-1 define i8 @t2(i8 %x) { ; CHECK-LABEL: @t2( -; CHECK-NEXT: [[X_LOWBITS:%.*]] = and i8 [[X:%.*]], 15 -; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq i8 [[X_LOWBITS]], 0 -; CHECK-NEXT: [[X_BIASED:%.*]] = add i8 [[X]], 15 -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and i8 [[X_BIASED]], -16 -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select i1 [[X_LOWBITS_ARE_ZERO]], i8 [[X]], i8 [[X_BIASED_HIGHBITS]] +; CHECK-NEXT: [[X_OFFSET:%.*]] = add i8 [[X:%.*]], 15 +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and i8 [[X_OFFSET]], -16 ; CHECK-NEXT: ret i8 [[X_ROUNDEDUP]] ; %x.lowbits = and i8 %x, 15 @@ -67,9 +58,8 @@ define i8 @t3_commutative(i8 %x) { ; CHECK-NEXT: [[X_LOWBITS:%.*]] = and i8 [[X:%.*]], 15 ; CHECK-NEXT: [[X_LOWBITS_ARE_NOT_ZERO:%.*]] = icmp ne i8 [[X_LOWBITS]], 0 ; CHECK-NEXT: call void @use.i1(i1 [[X_LOWBITS_ARE_NOT_ZERO]]) -; CHECK-NEXT: [[X_BIASED:%.*]] = add i8 [[X]], 16 -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and i8 [[X_BIASED]], -16 -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select i1 [[X_LOWBITS_ARE_NOT_ZERO]], i8 [[X_BIASED_HIGHBITS]], i8 [[X]] +; CHECK-NEXT: [[X_OFFSET:%.*]] = add i8 [[X]], 15 +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and i8 [[X_OFFSET]], -16 ; CHECK-NEXT: ret i8 [[X_ROUNDEDUP]] ; %x.lowbits = and i8 %x, 15 @@ -84,11 +74,8 @@ define i8 @t3_commutative(i8 %x) { ; Basic splat vector test define <2 x i8> @t4_splat(<2 x i8> %x) { ; CHECK-LABEL: @t4_splat( -; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i8> [[X_LOWBITS]], zeroinitializer -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X]], -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i8> [[X]], <2 x i8> [[X_BIASED_HIGHBITS]] +; CHECK-NEXT: [[X_OFFSET:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_OFFSET]], ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, @@ -102,11 +89,8 @@ define <2 x i8> @t4_splat(<2 x i8> %x) { ; Splat-with-undef define <2 x i8> @t5_splat_undef_0b0001(<2 x i8> %x) { ; CHECK-LABEL: @t5_splat_undef_0b0001( -; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i8> [[X_LOWBITS]], zeroinitializer -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X]], -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i8> [[X]], <2 x i8> [[X_BIASED_HIGHBITS]] +; CHECK-NEXT: [[X_OFFSET:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_OFFSET]], ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, @@ -118,11 +102,8 @@ define <2 x i8> @t5_splat_undef_0b0001(<2 x i8> %x) { } define <2 x i8> @t5_splat_undef_0b0010(<2 x i8> %x) { ; CHECK-LABEL: @t5_splat_undef_0b0010( -; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i8> [[X_LOWBITS]], zeroinitializer -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X]], -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i8> [[X]], <2 x i8> [[X_BIASED_HIGHBITS]] +; CHECK-NEXT: [[X_OFFSET:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_OFFSET]], ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, @@ -134,11 +115,8 @@ define <2 x i8> @t5_splat_undef_0b0010(<2 x i8> %x) { } define <2 x i8> @t5_splat_undef_0b0100(<2 x i8> %x) { ; CHECK-LABEL: @t5_splat_undef_0b0100( -; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i8> [[X_LOWBITS]], -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X]], -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i8> [[X]], <2 x i8> [[X_BIASED_HIGHBITS]] +; CHECK-NEXT: [[X_OFFSET:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_OFFSET]], ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, @@ -150,11 +128,8 @@ define <2 x i8> @t5_splat_undef_0b0100(<2 x i8> %x) { } define <2 x i8> @t5_splat_undef_0b1000(<2 x i8> %x) { ; CHECK-LABEL: @t5_splat_undef_0b1000( -; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i8> [[X_LOWBITS]], zeroinitializer -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X]], -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], -; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i8> [[X]], <2 x i8> [[X_BIASED_HIGHBITS]] +; CHECK-NEXT: [[X_OFFSET:%.*]] = add <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_OFFSET]], ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x,