forked from OSchip/llvm-project
[DAG] add splat vector support for 'and' in SimplifyDemandedBits
The patch itself is simple: stop discriminating against vectors in visitAnd() and again in SimplifyDemandedBits(). Some notes for reference: 1. We're not consistent about calls to SimplifyDemandedBits in the various visitXXX functions. Sometimes, we check if the RHS is a constant first. Other times (like here), we just dive in. 2. I'd like to break the vector shackles in steps for the sake of risk minimization, but we could make similar simultaneous changes in other places if we think that would be better. 3. I don't know what the intent of the changed tests in this patch was supposed to be, but since they wiggled in a positive way, I'm just going with that. :) 4. In the rotate tests, note that we can see through non-splat constants. This is a result of D24253. 5. My motivation for being here now is to make D31944 look better, so this is step 1 of N towards improving the vector codegen in that patch without writing any actual new code. Differential Revision: https://reviews.llvm.org/D32230 llvm-svn: 300725
This commit is contained in:
parent
3fa4bb4024
commit
ded7d59f0e
|
@ -3705,7 +3705,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
|
|||
|
||||
// fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
|
||||
// fold (and (sra)) -> (and (srl)) when possible.
|
||||
if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
|
||||
if (SimplifyDemandedBits(SDValue(N, 0)))
|
||||
return SDValue(N, 0);
|
||||
|
||||
// fold (zext_inreg (extload x)) -> (zextload x)
|
||||
|
|
|
@ -574,7 +574,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
|
|||
// using the bits from the RHS. Below, we use knowledge about the RHS to
|
||||
// simplify the LHS, here we're using information from the LHS to simplify
|
||||
// the RHS.
|
||||
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
|
||||
if (ConstantSDNode *RHSC = isConstOrConstSplat(Op.getOperand(1))) {
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
APInt LHSZero, LHSOne;
|
||||
// Do not increment Depth here; that can cause an infinite loop.
|
||||
|
|
|
@ -542,7 +542,7 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind re
|
|||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpmovsxdq %xmm0, %ymm0
|
||||
; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; KNL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
|
||||
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
|
@ -923,7 +923,7 @@ define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind
|
|||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpmovsxdq %xmm0, %ymm0
|
||||
; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; KNL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
|
||||
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
|
@ -1110,7 +1110,7 @@ define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind
|
|||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpslld $31, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpsrad $31, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpmovsxdq %xmm0, %ymm0
|
||||
; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; KNL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||||
; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
|
@ -1173,7 +1173,7 @@ define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind
|
|||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpmovsxdq %xmm1, %ymm1
|
||||
; KNL-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
|
||||
; KNL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
|
||||
; KNL-NEXT: vpand %ymm0, %ymm1, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
|
|
|
@ -1534,31 +1534,20 @@ define <16 x i8> @splatconstant_rotate_v16i8(<16 x i8> %a) nounwind {
|
|||
define <2 x i64> @splatconstant_rotate_mask_v2i64(<2 x i64> %a) nounwind {
|
||||
; SSE-LABEL: splatconstant_rotate_mask_v2i64:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movdqa %xmm0, %xmm1
|
||||
; SSE-NEXT: psllq $15, %xmm1
|
||||
; SSE-NEXT: psrlq $49, %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm0
|
||||
; SSE-NEXT: pand {{.*}}(%rip), %xmm1
|
||||
; SSE-NEXT: por %xmm0, %xmm1
|
||||
; SSE-NEXT: movdqa %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: splatconstant_rotate_mask_v2i64:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vpsllq $15, %xmm0, %xmm1
|
||||
; AVX-NEXT: vpsrlq $49, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_rotate_mask_v2i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsllq $15, %xmm0, %xmm1
|
||||
; AVX512-NEXT: vpsrlq $49, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: splatconstant_rotate_mask_v2i64:
|
||||
|
|
|
@ -1014,34 +1014,23 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind {
|
|||
define <4 x i64> @splatconstant_rotate_mask_v4i64(<4 x i64> %a) nounwind {
|
||||
; AVX1-LABEL: splatconstant_rotate_mask_v4i64:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vpsllq $15, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vpsllq $15, %xmm2, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vpsrlq $49, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrlq $49, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vpsrlq $49, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: splatconstant_rotate_mask_v4i64:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vpsllq $15, %ymm0, %ymm1
|
||||
; AVX2-NEXT: vpsrlq $49, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: splatconstant_rotate_mask_v4i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpsllq $15, %ymm0, %ymm1
|
||||
; AVX512-NEXT: vpsrlq $49, %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
|
||||
; AVX512-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
|
||||
; AVX512-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; XOPAVX1-LABEL: splatconstant_rotate_mask_v4i64:
|
||||
|
|
Loading…
Reference in New Issue