[DAG] add splat vector support for 'or' in SimplifyDemandedBits

I've changed one of the tests to not fold away, but we didn't and still don't do the transform
that the comment claims we do (and I don't know why we'd want to do that).

Follow-up to:
https://reviews.llvm.org/rL300725
https://reviews.llvm.org/rL300763

llvm-svn: 300772
This commit is contained in:
Sanjay Patel 2017-04-19 22:00:00 +00:00
parent 4a3a870da9
commit 0658a95a35
3 changed files with 16 additions and 21 deletions

View File

@ -4225,8 +4225,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return Load;
// Simplify the operands using demanded-bits information.
if (!VT.isVector() &&
SimplifyDemandedBits(SDValue(N, 0)))
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
return SDValue();

View File

@ -430,6 +430,7 @@ define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) {
ret <4 x i32> %or
}
; TODO: Why would we do this?
; (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
define <2 x i64> @or_and_v2i64(<2 x i64> %a0) {
@ -438,16 +439,17 @@ define <2 x i64> @or_and_v2i64(<2 x i64> %a0) {
; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
; CHECK-NEXT: orps {{.*}}(%rip), %xmm0
; CHECK-NEXT: retq
%1 = and <2 x i64> %a0, <i64 1, i64 1>
%1 = and <2 x i64> %a0, <i64 7, i64 7>
%2 = or <2 x i64> %1, <i64 3, i64 3>
ret <2 x i64> %2
}
; If all masked bits are going to be set, that's a constant fold.
define <4 x i32> @or_and_v4i32(<4 x i32> %a0) {
; CHECK-LABEL: or_and_v4i32:
; CHECK: # BB#0:
; CHECK-NEXT: andps {{.*}}(%rip), %xmm0
; CHECK-NEXT: orps {{.*}}(%rip), %xmm0
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3]
; CHECK-NEXT: retq
%1 = and <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1>
%2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
@ -459,9 +461,7 @@ define <4 x i32> @or_and_v4i32(<4 x i32> %a0) {
define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) {
; CHECK-LABEL: or_zext_v2i32:
; CHECK: # BB#0:
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295]
; CHECK-NEXT: retq
%1 = zext <2 x i32> %a0 to <2 x i64>
%2 = or <2 x i64> %1, <i64 4294967295, i64 4294967295>
@ -471,9 +471,7 @@ define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) {
define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) {
; CHECK-LABEL: or_zext_v4i16:
; CHECK: # BB#0:
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535]
; CHECK-NEXT: retq
%1 = zext <4 x i16> %a0 to <4 x i32>
%2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>

View File

@ -237,21 +237,19 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind {
; X64-SSE-NEXT: pandn %xmm0, %xmm2
; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm3
; X64-SSE-NEXT: por %xmm2, %xmm3
; X64-SSE-NEXT: movdqa %xmm3, %xmm0
; X64-SSE-NEXT: pxor %xmm1, %xmm0
; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255]
; X64-SSE-NEXT: por %xmm2, %xmm1
; X64-SSE-NEXT: movdqa %xmm0, %xmm4
; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm4
; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
; X64-SSE-NEXT: pxor %xmm3, %xmm1
; X64-SSE-NEXT: movdqa {{.*#+}} xmm0 = [2147483903,2147483903]
; X64-SSE-NEXT: movdqa %xmm1, %xmm2
; X64-SSE-NEXT: pcmpgtd %xmm0, %xmm2
; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2]
; X64-SSE-NEXT: pcmpeqd %xmm0, %xmm1
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
; X64-SSE-NEXT: pand %xmm5, %xmm0
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
; X64-SSE-NEXT: pand %xmm4, %xmm0
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
; X64-SSE-NEXT: por %xmm0, %xmm1
; X64-SSE-NEXT: movdqa %xmm1, %xmm0
; X64-SSE-NEXT: pandn %xmm3, %xmm0
; X64-SSE-NEXT: pand %xmm2, %xmm1
; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm1
; X64-SSE-NEXT: por %xmm0, %xmm1
; X64-SSE-NEXT: movd %xmm1, %rax
; X64-SSE-NEXT: xorps %xmm0, %xmm0