[InstCombine] Support vector splats in foldSelectICmpAnd.

Unfortunately, it looks like there's some other missed optimizations in the generated code for some of these cases. I'll try to look at some of those next.

llvm-svn: 310184
This commit is contained in:
Craig Topper 2017-08-05 20:00:41 +00:00
parent cc2294a4eb
commit 1bbcab9ca5
2 changed files with 82 additions and 15 deletions

View File

@ -597,18 +597,24 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp,
/// icmp instruction with zero, and we have an 'and' with the non-constant value
/// and a power of two we can turn the select into a shift on the result of the
/// 'and'.
static Value *foldSelectICmpAnd(const SelectInst &SI, const ICmpInst *IC,
static Value *foldSelectICmpAnd(Type *SelType, const ICmpInst *IC,
APInt TrueVal, APInt FalseVal,
InstCombiner::BuilderTy &Builder) {
if (!IC->isEquality() || !SI.getType()->isIntegerTy())
assert(SelType->isIntOrIntVectorTy() && "Not an integer select?");
// If this is a vector select, we need a vector compare.
if (SelType->isVectorTy() != IC->getType()->isVectorTy())
return nullptr;
if (!IC->isEquality())
return nullptr;
if (!match(IC->getOperand(1), m_Zero()))
return nullptr;
ConstantInt *AndRHS;
const APInt *AndRHS;
Value *LHS = IC->getOperand(0);
if (!match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
if (!match(LHS, m_And(m_Value(), m_Power2(AndRHS))))
return nullptr;
// If both select arms are non-zero see if we have a select of the form
@ -628,28 +634,27 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, const ICmpInst *IC,
FalseVal -= Offset;
}
// Make sure the mask in the 'and' and one of the select arms is a power of 2.
if (!AndRHS->getValue().isPowerOf2() ||
(!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2()))
// Make sure one of the select arms is a power of 2.
if (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2())
return nullptr;
// Determine which shift is needed to transform result of the 'and' into the
// desired result.
const APInt &ValC = !TrueVal.isNullValue() ? TrueVal : FalseVal;
unsigned ValZeros = ValC.logBase2();
unsigned AndZeros = AndRHS->getValue().logBase2();
unsigned AndZeros = AndRHS->logBase2();
// If types don't match we can still convert the select by introducing a zext
// or a trunc of the 'and'.
Value *V = LHS;
if (ValZeros > AndZeros) {
V = Builder.CreateZExtOrTrunc(V, SI.getType());
V = Builder.CreateZExtOrTrunc(V, SelType);
V = Builder.CreateShl(V, ValZeros - AndZeros);
} else if (ValZeros < AndZeros) {
V = Builder.CreateLShr(V, AndZeros - ValZeros);
V = Builder.CreateZExtOrTrunc(V, SI.getType());
V = Builder.CreateZExtOrTrunc(V, SelType);
} else
V = Builder.CreateZExtOrTrunc(V, SI.getType());
V = Builder.CreateZExtOrTrunc(V, SelType);
// Okay, now we know that everything is set up, we just don't know whether we
// have a icmp_ne or icmp_eq and whether the true or false val is the zero.
@ -670,11 +675,14 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
Value *TrueVal = SI.getTrueValue();
Value *FalseVal = SI.getFalseValue();
if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal))
if (Value *V = foldSelectICmpAnd(SI, ICI, TrueValC->getValue(),
FalseValC->getValue(), Builder))
{
const APInt *TrueValC, *FalseValC;
if (match(TrueVal, m_APInt(TrueValC)) &&
match(FalseVal, m_APInt(FalseValC)))
if (Value *V = foldSelectICmpAnd(SI.getType(), ICI, *TrueValC,
*FalseValC, Builder))
return replaceInstUsesWith(SI, V);
}
if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, Builder))
return NewSel;

View File

@ -268,6 +268,21 @@ define i32 @test65(i64 %x) {
ret i32 %3
}
define <2 x i32> @test65vec(<2 x i64> %x) {
; CHECK-LABEL: @test65vec(
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], <i64 16, i64 16>
; CHECK-NEXT: [[TMP2:%.*]] = lshr exact <2 x i64> [[TMP1]], <i64 3, i64 3>
; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP3]], <i32 40, i32 40>
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], <i32 2, i32 2>
; CHECK-NEXT: ret <2 x i32> [[TMP5]]
;
%1 = and <2 x i64> %x, <i64 16, i64 16>
%2 = icmp ne <2 x i64> %1, zeroinitializer
%3 = select <2 x i1> %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
ret <2 x i32> %3
}
define i32 @test66(i64 %x) {
; CHECK-LABEL: @test66(
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 31
@ -282,6 +297,35 @@ define i32 @test66(i64 %x) {
ret i32 %3
}
define <2 x i32> @test66vec(<2 x i64> %x) {
; CHECK-LABEL: @test66vec(
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], <i64 4294967296, i64 4294967296>
; CHECK-NEXT: [[TMP2:%.*]] = lshr exact <2 x i64> [[TMP1]], <i64 31, i64 31>
; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP3]], <i32 40, i32 40>
; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], <i32 2, i32 2>
; CHECK-NEXT: ret <2 x i32> [[TMP5]]
;
%1 = and <2 x i64> %x, <i64 4294967296, i64 4294967296>
%2 = icmp ne <2 x i64> %1, zeroinitializer
%3 = select <2 x i1> %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
ret <2 x i32> %3
}
; Make sure we don't try to optimize a scalar 'and' with a vector select.
define <2 x i32> @test66vec_scalar_and(i64 %x) {
; CHECK-LABEL: @test66vec_scalar_and(
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967296
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], <2 x i32> <i32 42, i32 42>, <2 x i32> <i32 40, i32 40>
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%1 = and i64 %x, 4294967296
%2 = icmp ne i64 %1, 0
%3 = select i1 %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
ret <2 x i32> %3
}
define i32 @test67(i16 %x) {
; CHECK-LABEL: @test67(
; CHECK-NEXT: [[TMP1:%.*]] = lshr i16 %x, 1
@ -296,6 +340,21 @@ define i32 @test67(i16 %x) {
ret i32 %3
}
define <2 x i32> @test67vec(<2 x i16> %x) {
; CHECK-LABEL: @test67vec(
; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 4, i16 4>
; CHECK-NEXT: [[TMP2:%.*]] = lshr exact <2 x i16> [[TMP1]], <i16 1, i16 1>
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i16> [[TMP2]], <i16 40, i16 40>
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i16> [[TMP3]], <i16 2, i16 2>
; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i16> [[TMP4]] to <2 x i32>
; CHECK-NEXT: ret <2 x i32> [[TMP5]]
;
%1 = and <2 x i16> %x, <i16 4, i16 4>
%2 = icmp ne <2 x i16> %1, zeroinitializer
%3 = select <2 x i1> %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
ret <2 x i32> %3
}
define i32 @test68(i32 %x, i32 %y) {
; CHECK-LABEL: @test68(
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6