[TargetLowering] Add ISD::AND handling to SimplifyDemandedVectorElts

If either of the operand elements are zero then we know the result element is going to be zero (even if the other element is undef).

Differential Revision: https://reviews.llvm.org/D55558

llvm-svn: 348926
This commit is contained in:
Simon Pilgrim 2018-12-12 13:43:07 +00:00
parent 125d9b0907
commit f6c898e12f
7 changed files with 57 additions and 46 deletions

View File

@ -1787,6 +1787,22 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownUndef &= SrcUndef;
break;
}
case ISD::AND: {
APInt SrcUndef, SrcZero;
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
SrcZero, TLO, Depth + 1))
return true;
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
KnownZero, TLO, Depth + 1))
return true;
// If either side has a zero element, then the result element is zero, even
// if the other is an UNDEF.
KnownZero |= SrcZero;
KnownUndef &= SrcUndef;
KnownUndef &= ~KnownZero;
break;
}
case ISD::TRUNCATE:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:

View File

@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
;
; Check that a widening truncate to a vector of i1 elements can be handled.
@ -5,19 +6,14 @@
define void @pr32275(<4 x i8> %B15) {
; CHECK-LABEL: pr32275:
; CHECK: # %bb.0: # %BB
; CHECK: vlgvb %r0, %v24, 3
; CHECK-NEXT: vlgvb %r1, %v24, 1
; CHECK-NEXT: vlvgp [[REG1:%v[0-9]]], %r1, %r0
; CHECK-NEXT: vlgvb %r0, %v24, 0
; CHECK-NEXT: vlgvb [[REG3:%r[0-9]]], %v24, 2
; CHECK-NEXT: vrepif [[REG0:%v[0-9]]], 1
; CHECK: .LBB0_1:
; CHECK-DAG: vlr [[REG2:%v[0-9]]], [[REG1]]
; CHECK-DAG: vlvgf [[REG2]], %r0, 0
; CHECK-NEXT: vlvgf [[REG2]], [[REG3]], 2
; CHECK-NEXT: vn [[REG2]], [[REG2]], [[REG0]]
; CHECK-NEXT: vlgvf [[REG4:%r[0-9]]], [[REG2]], 3
; CHECK-NEXT: cijlh [[REG4]], 0, .LBB0_1
; CHECK-NEXT: vlgvb %r0, %v24, 3
; CHECK-NEXT: vlvgp %v0, %r0, %r0
; CHECK-NEXT: vrepif %v1, 1
; CHECK-NEXT: vn %v0, %v0, %v1
; CHECK-NEXT: vlgvf %r0, %v0, 3
; CHECK-NEXT: .LBB0_1: # %CF34
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cijlh %r0, 0, .LBB0_1
; CHECK-NEXT: # %bb.2: # %CF36
; CHECK-NEXT: br %r14
BB:

View File

@ -26,7 +26,6 @@ define double @mag_neg0_double(double %x) nounwind {
; CHECK-LABEL: mag_neg0_double:
; CHECK: ## %bb.0:
; CHECK-NEXT: movsd [[SIGNMASK2]](%rip), %xmm1
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0]
; CHECK-NEXT: andps %xmm1, %xmm0
; CHECK-NEXT: retq
;
@ -92,7 +91,6 @@ define float @mag_neg0_float(float %x) nounwind {
; CHECK-LABEL: mag_neg0_float:
; CHECK: ## %bb.0:
; CHECK-NEXT: movss [[SIGNMASK6]](%rip), %xmm1
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; CHECK-NEXT: andps %xmm1, %xmm0
; CHECK-NEXT: retq
;

View File

@ -5,13 +5,17 @@
define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
; X32-LABEL: knownbits_mask_extract_sext:
; X32: # %bb.0:
; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0
; X32-NEXT: movl $15, %eax
; X32-NEXT: vmovd %eax, %xmm1
; X32-NEXT: vpand %xmm1, %xmm0, %xmm0
; X32-NEXT: vpextrw $0, %xmm0, %eax
; X32-NEXT: retl
;
; X64-LABEL: knownbits_mask_extract_sext:
; X64: # %bb.0:
; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; X64-NEXT: movl $15, %eax
; X64-NEXT: vmovd %eax, %xmm1
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-NEXT: vpextrw $0, %xmm0, %eax
; X64-NEXT: retq
%1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>

View File

@ -253,16 +253,16 @@ define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2
; X32-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: vpsrlq $60, %xmm0, %xmm2
; X32-NEXT: vpsrlq $60, %xmm0, %xmm1
; X32-NEXT: vpsrlq $61, %xmm0, %xmm0
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [4,0,0,0,8,0,0,0]
; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0
; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; X32-NEXT: vpinsrd $0, {{[0-9]+}}(%esp), %xmm1, %xmm1
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [4,0,0,0,8,0,0,0]
; X32-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X32-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: vpand %xmm1, %xmm0, %xmm0
; X32-NEXT: vmovd %xmm0, %eax
; X32-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0
; X32-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; X32-NEXT: vmovss %xmm0, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
@ -270,17 +270,17 @@ define float @signbits_ashr_sext_sextinreg_and_extract_sitofp(<2 x i64> %a0, <2
;
; X64-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp:
; X64: # %bb.0:
; X64-NEXT: vpsrlq $60, %xmm0, %xmm2
; X64-NEXT: vpsrlq $60, %xmm0, %xmm1
; X64-NEXT: vpsrlq $61, %xmm0, %xmm0
; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
; X64-NEXT: vmovdqa {{.*#+}} xmm2 = [4,8]
; X64-NEXT: vpxor %xmm2, %xmm0, %xmm0
; X64-NEXT: vpsubq %xmm2, %xmm0, %xmm0
; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [4,8]
; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0
; X64-NEXT: vpsubq %xmm1, %xmm0, %xmm0
; X64-NEXT: movslq %edi, %rax
; X64-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1
; X64-NEXT: vmovq %rax, %xmm1
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-NEXT: vmovq %xmm0, %rax
; X64-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0
; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
; X64-NEXT: retq
%1 = ashr <2 x i64> %a0, <i64 61, i64 60>
%2 = sext i32 %a2 to i64

View File

@ -861,15 +861,13 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
; X32-SSE: # %bb.0:
; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
; X32-SSE-NEXT: pand %xmm2, %xmm0
; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
; X32-SSE-NEXT: pand %xmm2, %xmm3
; X32-SSE-NEXT: pand %xmm1, %xmm2
; X32-SSE-NEXT: movdqa %xmm0, %xmm3
; X32-SSE-NEXT: psrlq %xmm2, %xmm3
; X32-SSE-NEXT: pxor %xmm2, %xmm2
; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
; X32-SSE-NEXT: psrlq %xmm2, %xmm1
; X32-SSE-NEXT: psrlq %xmm3, %xmm0
; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; X32-SSE-NEXT: movapd %xmm1, %xmm0
; X32-SSE-NEXT: psrlq %xmm2, %xmm0
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
; X32-SSE-NEXT: retl
%splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
%shift = lshr <2 x i32> %a, %splat

View File

@ -639,15 +639,14 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
;
; X32-SSE-LABEL: splatvar_shift_v2i32:
; X32-SSE: # %bb.0:
; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2
; X32-SSE-NEXT: xorps %xmm3, %xmm3
; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
; X32-SSE-NEXT: movdqa %xmm0, %xmm1
; X32-SSE-NEXT: psllq %xmm3, %xmm1
; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0]
; X32-SSE-NEXT: pand %xmm1, %xmm2
; X32-SSE-NEXT: movdqa %xmm0, %xmm3
; X32-SSE-NEXT: psllq %xmm2, %xmm3
; X32-SSE-NEXT: pxor %xmm2, %xmm2
; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
; X32-SSE-NEXT: psllq %xmm2, %xmm0
; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
; X32-SSE-NEXT: movapd %xmm1, %xmm0
; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
; X32-SSE-NEXT: retl
%splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
%shift = shl <2 x i32> %a, %splat