diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9f50bfe7bfbd..66afc905ca00 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4225,8 +4225,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return Load; // Simplify the operands using demanded-bits information. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll index e4cf296432ba..d7f52d247988 100644 --- a/llvm/test/CodeGen/X86/combine-or.ll +++ b/llvm/test/CodeGen/X86/combine-or.ll @@ -430,6 +430,7 @@ define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) { ret <4 x i32> %or } +; TODO: Why would we do this? ; (or (and X, c1), c2) -> (and (or X, c2), c1|c2) define <2 x i64> @or_and_v2i64(<2 x i64> %a0) { @@ -438,16 +439,17 @@ define <2 x i64> @or_and_v2i64(<2 x i64> %a0) { ; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %1 = and <2 x i64> %a0, + %1 = and <2 x i64> %a0, %2 = or <2 x i64> %1, ret <2 x i64> %2 } +; If all masked bits are going to be set, that's a constant fold. + define <4 x i32> @or_and_v4i32(<4 x i32> %a0) { ; CHECK-LABEL: or_and_v4i32: ; CHECK: # BB#0: -; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 -; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3] ; CHECK-NEXT: retq %1 = and <4 x i32> %a0, %2 = or <4 x i32> %1, @@ -459,9 +461,7 @@ define <4 x i32> @or_and_v4i32(<4 x i32> %a0) { define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) { ; CHECK-LABEL: or_zext_v2i32: ; CHECK: # BB#0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; CHECK-NEXT: por {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295] ; CHECK-NEXT: retq %1 = zext <2 x i32> %a0 to <2 x i64> %2 = or <2 x i64> %1, @@ -471,9 +471,7 @@ define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) { define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) { ; CHECK-LABEL: or_zext_v4i16: ; CHECK: # BB#0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] -; CHECK-NEXT: por {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535] ; CHECK-NEXT: retq %1 = zext <4 x i16> %a0 to <4 x i32> %2 = or <4 x i32> %1, diff --git a/llvm/test/CodeGen/X86/i64-to-float.ll b/llvm/test/CodeGen/X86/i64-to-float.ll index 9626d64847fe..3da1a360e290 100644 --- a/llvm/test/CodeGen/X86/i64-to-float.ll +++ b/llvm/test/CodeGen/X86/i64-to-float.ll @@ -237,21 +237,19 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind { ; X64-SSE-NEXT: pandn %xmm0, %xmm2 ; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm3 ; X64-SSE-NEXT: por %xmm2, %xmm3 -; X64-SSE-NEXT: movdqa %xmm3, %xmm0 -; X64-SSE-NEXT: pxor %xmm1, %xmm0 -; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255] -; X64-SSE-NEXT: por %xmm2, %xmm1 -; X64-SSE-NEXT: movdqa %xmm0, %xmm4 -; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm4 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] +; X64-SSE-NEXT: pxor %xmm3, %xmm1 +; X64-SSE-NEXT: movdqa {{.*#+}} xmm0 = [2147483903,2147483903] +; X64-SSE-NEXT: movdqa %xmm1, %xmm2 +; X64-SSE-NEXT: pcmpgtd %xmm0, %xmm2 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] ; X64-SSE-NEXT: pcmpeqd %xmm0, %xmm1 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] -; X64-SSE-NEXT: pand %xmm5, %xmm0 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] +; X64-SSE-NEXT: pand %xmm4, %xmm0 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] ; X64-SSE-NEXT: por %xmm0, %xmm1 ; X64-SSE-NEXT: movdqa %xmm1, %xmm0 ; X64-SSE-NEXT: pandn %xmm3, %xmm0 -; X64-SSE-NEXT: pand %xmm2, %xmm1 +; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm1 ; X64-SSE-NEXT: por %xmm0, %xmm1 ; X64-SSE-NEXT: movd %xmm1, %rax ; X64-SSE-NEXT: xorps %xmm0, %xmm0