[InstCombine] Support vector splats in transformZExtICmp

This patch adds splat support to transformZExtICmp. The test cases are vector versions of tests that failed when commenting out parts of the existing scalar code.

One test didn't vectorize optimize properly due to another bug so a TODO has been added.

Differential Revision: https://reviews.llvm.org/D37253

llvm-svn: 312023
This commit is contained in:
Craig Topper 2017-08-29 18:58:13 +00:00
parent e7becd7e85
commit 4431bfe88c
4 changed files with 93 additions and 7 deletions

View File

@ -772,13 +772,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
// If we are just checking for a icmp eq of a single bit and zext'ing it // If we are just checking for a icmp eq of a single bit and zext'ing it
// to an integer, then shift the bit to the appropriate place and then // to an integer, then shift the bit to the appropriate place and then
// cast to integer to avoid the comparison. // cast to integer to avoid the comparison.
if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) { const APInt *Op1CV;
const APInt &Op1CV = Op1C->getValue(); if (match(ICI->getOperand(1), m_APInt(Op1CV))) {
// zext (x <s 0) to i32 --> x>>u31 true if signbit set. // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
// zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear. // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV.isNullValue()) || if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) ||
(ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) { (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) {
if (!DoTransform) return ICI; if (!DoTransform) return ICI;
Value *In = ICI->getOperand(0); Value *In = ICI->getOperand(0);
@ -804,7 +804,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
// zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set. // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
// zext (X != 1) to i32 --> X^1 iff X has only the low bit set. // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
// zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set. // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
if ((Op1CV.isNullValue() || Op1CV.isPowerOf2()) && if ((Op1CV->isNullValue() || Op1CV->isPowerOf2()) &&
// This only works for EQ and NE // This only works for EQ and NE
ICI->isEquality()) { ICI->isEquality()) {
// If Op1C some other power of two, convert: // If Op1C some other power of two, convert:
@ -815,7 +815,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
if (!DoTransform) return ICI; if (!DoTransform) return ICI;
bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE; bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
if (!Op1CV.isNullValue() && (Op1CV != KnownZeroMask)) { if (!Op1CV->isNullValue() && (*Op1CV != KnownZeroMask)) {
// (X&4) == 2 --> false // (X&4) == 2 --> false
// (X&4) != 2 --> true // (X&4) != 2 --> true
Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()), Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()),
@ -833,7 +833,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
In->getName() + ".lobit"); In->getName() + ".lobit");
} }
if (!Op1CV.isNullValue() == isNE) { // Toggle the low bit. if (!Op1CV->isNullValue() == isNE) { // Toggle the low bit.
Constant *One = ConstantInt::get(In->getType(), 1); Constant *One = ConstantInt::get(In->getType(), 1);
In = Builder.CreateXor(In, One); In = Builder.CreateXor(In, One);
} }

View File

@ -48,6 +48,22 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone {
ret i32 %t3 ret i32 %t3
} }
; TODO this should optimize but doesn't due to missing vector support in InstCombiner::foldICmpEquality.
define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone {
; CHECK-LABEL: @test3vec(
; CHECK-NEXT: [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 31, i32 31>
; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], <i32 31, i32 31>
; CHECK-NEXT: [[T2:%.*]] = icmp eq <2 x i32> [[T0]], [[T1]]
; CHECK-NEXT: [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32>
; CHECK-NEXT: ret <2 x i32> [[T3]]
;
%t0 = lshr <2 x i32> %a, <i32 31, i32 31>
%t1 = lshr <2 x i32> %b, <i32 31, i32 31>
%t2 = icmp eq <2 x i32> %t0, %t1
%t3 = zext <2 x i1> %t2 to <2 x i32>
ret <2 x i32> %t3
}
; Variation on @test3: checking the 2nd bit in a situation where the 5th bit ; Variation on @test3: checking the 2nd bit in a situation where the 5th bit
; is one, not zero. ; is one, not zero.
define i32 @test3i(i32 %a, i32 %b) nounwind readnone { define i32 @test3i(i32 %a, i32 %b) nounwind readnone {

View File

@ -13,6 +13,16 @@ define i32 @test1(i32 %X) {
ret i32 %b ret i32 %b
} }
define <2 x i32> @test1vec(<2 x i32> %X) {
; CHECK-LABEL: @test1vec(
; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 31>
; CHECK-NEXT: ret <2 x i32> [[X_LOBIT]]
;
%a = icmp slt <2 x i32> %X, zeroinitializer
%b = zext <2 x i1> %a to <2 x i32>
ret <2 x i32> %b
}
define i32 @test2(i32 %X) { define i32 @test2(i32 %X) {
; CHECK-LABEL: @test2( ; CHECK-LABEL: @test2(
; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr i32 %X, 31 ; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr i32 %X, 31
@ -24,6 +34,17 @@ define i32 @test2(i32 %X) {
ret i32 %b ret i32 %b
} }
define <2 x i32> @test2vec(<2 x i32> %X) {
; CHECK-LABEL: @test2vec(
; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 31>
; CHECK-NEXT: [[X_LOBIT_NOT:%.*]] = xor <2 x i32> [[X_LOBIT]], <i32 1, i32 1>
; CHECK-NEXT: ret <2 x i32> [[X_LOBIT_NOT]]
;
%a = icmp ult <2 x i32> %X, <i32 -2147483648, i32 -2147483648>
%b = zext <2 x i1> %a to <2 x i32>
ret <2 x i32> %b
}
define i32 @test3(i32 %X) { define i32 @test3(i32 %X) {
; CHECK-LABEL: @test3( ; CHECK-LABEL: @test3(
; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr i32 %X, 31 ; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr i32 %X, 31

View File

@ -282,6 +282,17 @@ define i32 @test20(i32 %A) {
ret i32 %D ret i32 %D
} }
define <2 x i32> @test20vec(<2 x i32> %A) {
; CHECK-LABEL: @test20vec(
; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A:%.*]], <i32 1, i32 1>
; CHECK-NEXT: ret <2 x i32> [[B]]
;
%B = and <2 x i32> %A, <i32 1, i32 1>
%C = icmp ne <2 x i32> %B, zeroinitializer
%D = zext <2 x i1> %C to <2 x i32>
ret <2 x i32> %D
}
define i32 @test21(i32 %a) { define i32 @test21(i32 %a) {
; CHECK-LABEL: @test21( ; CHECK-LABEL: @test21(
; CHECK-NEXT: [[TMP_6:%.*]] = lshr i32 %a, 2 ; CHECK-NEXT: [[TMP_6:%.*]] = lshr i32 %a, 2
@ -294,6 +305,18 @@ define i32 @test21(i32 %a) {
ret i32 %retval ret i32 %retval
} }
define <2 x i32> @test21vec(<2 x i32> %a) {
; CHECK-LABEL: @test21vec(
; CHECK-NEXT: [[TMP_6:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 2, i32 2>
; CHECK-NEXT: [[TMP_6_LOBIT:%.*]] = and <2 x i32> [[TMP_6]], <i32 1, i32 1>
; CHECK-NEXT: ret <2 x i32> [[TMP_6_LOBIT]]
;
%tmp.6 = and <2 x i32> %a, <i32 4, i32 4>
%not.tmp.7 = icmp ne <2 x i32> %tmp.6, zeroinitializer
%retval = zext <2 x i1> %not.tmp.7 to <2 x i32>
ret <2 x i32> %retval
}
define i1 @test22(i32 %A, i32 %X) { define i1 @test22(i32 %A, i32 %X) {
; CHECK-LABEL: @test22( ; CHECK-LABEL: @test22(
; CHECK-NEXT: ret i1 true ; CHECK-NEXT: ret i1 true
@ -318,6 +341,18 @@ define i32 @test23(i32 %a) {
ret i32 %tmp.3 ret i32 %tmp.3
} }
define <2 x i32> @test23vec(<2 x i32> %a) {
; CHECK-LABEL: @test23vec(
; CHECK-NEXT: [[TMP_1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[TMP_1]], <i32 1, i32 1>
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
;
%tmp.1 = and <2 x i32> %a, <i32 1, i32 1>
%tmp.2 = icmp eq <2 x i32> %tmp.1, zeroinitializer
%tmp.3 = zext <2 x i1> %tmp.2 to <2 x i32>
ret <2 x i32> %tmp.3
}
define i32 @test24(i32 %a) { define i32 @test24(i32 %a) {
; CHECK-LABEL: @test24( ; CHECK-LABEL: @test24(
; CHECK-NEXT: [[TMP_1:%.*]] = lshr i32 %a, 2 ; CHECK-NEXT: [[TMP_1:%.*]] = lshr i32 %a, 2
@ -332,6 +367,20 @@ define i32 @test24(i32 %a) {
ret i32 %tmp.3 ret i32 %tmp.3
} }
define <2 x i32> @test24vec(<2 x i32> %a) {
; CHECK-LABEL: @test24vec(
; CHECK-NEXT: [[TMP_1:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 2, i32 2>
; CHECK-NEXT: [[TMP_1_LOBIT:%.*]] = and <2 x i32> [[TMP_1]], <i32 1, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[TMP_1_LOBIT]], <i32 1, i32 1>
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
;
%tmp1 = and <2 x i32> %a, <i32 4, i32 4>
%tmp.1 = lshr <2 x i32> %tmp1, <i32 2, i32 2>
%tmp.2 = icmp eq <2 x i32> %tmp.1, zeroinitializer
%tmp.3 = zext <2 x i1> %tmp.2 to <2 x i32>
ret <2 x i32> %tmp.3
}
define i1 @test25(i32 %A) { define i1 @test25(i32 %A) {
; CHECK-LABEL: @test25( ; CHECK-LABEL: @test25(
; CHECK-NEXT: ret i1 false ; CHECK-NEXT: ret i1 false