diff --git a/llvm/test/CodeGen/X86/known-signbits-vector.ll b/llvm/test/CodeGen/X86/known-signbits-vector.ll index 1d2afebe2676..d5bc1b22d68d 100644 --- a/llvm/test/CodeGen/X86/known-signbits-vector.ll +++ b/llvm/test/CodeGen/X86/known-signbits-vector.ll @@ -488,6 +488,298 @@ define <4 x float> @signbits_ashr_sext_select_shuffle_sitofp(<4 x i64> %a0, <4 x ret <4 x float> %6 } +define <4 x i32> @signbits_mask_ashr_smax(<4 x i32> %a0, <4 x i32> %a1) { +; X86-LABEL: signbits_mask_ashr_smax: +; X86: # %bb.0: +; X86-NEXT: vpsrad $26, %xmm0, %xmm2 +; X86-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7] +; X86-NEXT: vpsrad $27, %xmm0, %xmm3 +; X86-NEXT: vpsrad $25, %xmm0, %xmm0 +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; X86-NEXT: vpsrad $26, %xmm1, %xmm2 +; X86-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X86-NEXT: vpsrad $27, %xmm1, %xmm3 +; X86-NEXT: vpsrad $25, %xmm1, %xmm1 +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; X86-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X86-NEXT: vpsrad $4, %xmm0, %xmm1 +; X86-NEXT: vpsrad $2, %xmm0, %xmm2 +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X86-NEXT: vpsrad $3, %xmm0, %xmm2 +; X86-NEXT: vpsrad $1, %xmm0, %xmm0 +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-AVX1-LABEL: signbits_mask_ashr_smax: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpsrad $26, %xmm0, %xmm2 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7] +; X64-AVX1-NEXT: vpsrad $27, %xmm0, %xmm3 +; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; X64-AVX1-NEXT: vpsrad $26, %xmm1, %xmm2 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X64-AVX1-NEXT: vpsrad $27, %xmm1, %xmm3 +; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; X64-AVX1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X64-AVX1-NEXT: vpsrad $4, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpsrad $2, %xmm0, %xmm2 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X64-AVX1-NEXT: vpsrad $3, %xmm0, %xmm2 +; X64-AVX1-NEXT: vpsrad $1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: signbits_mask_ashr_smax: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [25,26,27,0] +; X64-AVX2-NEXT: vpsravd %xmm2, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsravd %xmm2, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: retq + %1 = ashr <4 x i32> %a0, + %2 = ashr <4 x i32> %a1, + %3 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> %2) + %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer + %5 = ashr <4 x i32> %4, + %6 = and <4 x i32> %5, + ret <4 x i32> %6 +} +declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone + +define <4 x i32> @signbits_mask_ashr_smin(<4 x i32> %a0, <4 x i32> %a1) { +; X86-LABEL: signbits_mask_ashr_smin: +; X86: # %bb.0: +; X86-NEXT: vpsrad $26, %xmm0, %xmm2 +; X86-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7] +; X86-NEXT: vpsrad $27, %xmm0, %xmm3 +; X86-NEXT: vpsrad $25, %xmm0, %xmm0 +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; X86-NEXT: vpsrad $26, %xmm1, %xmm2 +; X86-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X86-NEXT: vpsrad $27, %xmm1, %xmm3 +; X86-NEXT: vpsrad $25, %xmm1, %xmm1 +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; X86-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X86-NEXT: vpsrad $4, %xmm0, %xmm1 +; X86-NEXT: vpsrad $2, %xmm0, %xmm2 +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X86-NEXT: vpsrad $3, %xmm0, %xmm2 +; X86-NEXT: vpsrad $1, %xmm0, %xmm0 +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-AVX1-LABEL: signbits_mask_ashr_smin: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpsrad $26, %xmm0, %xmm2 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7] +; X64-AVX1-NEXT: vpsrad $27, %xmm0, %xmm3 +; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; X64-AVX1-NEXT: vpsrad $26, %xmm1, %xmm2 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X64-AVX1-NEXT: vpsrad $27, %xmm1, %xmm3 +; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X64-AVX1-NEXT: vpsrad $4, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpsrad $2, %xmm0, %xmm2 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X64-AVX1-NEXT: vpsrad $3, %xmm0, %xmm2 +; X64-AVX1-NEXT: vpsrad $1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: signbits_mask_ashr_smin: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [25,26,27,0] +; X64-AVX2-NEXT: vpsravd %xmm2, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsravd %xmm2, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: retq + %1 = ashr <4 x i32> %a0, + %2 = ashr <4 x i32> %a1, + %3 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %1, <4 x i32> %2) + %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer + %5 = ashr <4 x i32> %4, + %6 = and <4 x i32> %5, + ret <4 x i32> %6 +} +declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone + +define <4 x i32> @signbits_mask_ashr_umax(<4 x i32> %a0, <4 x i32> %a1) { +; X86-LABEL: signbits_mask_ashr_umax: +; X86: # %bb.0: +; X86-NEXT: vpsrad $26, %xmm0, %xmm2 +; X86-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7] +; X86-NEXT: vpsrad $27, %xmm0, %xmm3 +; X86-NEXT: vpsrad $25, %xmm0, %xmm0 +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; X86-NEXT: vpsrad $26, %xmm1, %xmm2 +; X86-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X86-NEXT: vpsrad $27, %xmm1, %xmm3 +; X86-NEXT: vpsrad $25, %xmm1, %xmm1 +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; X86-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 +; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X86-NEXT: vpsrad $4, %xmm0, %xmm1 +; X86-NEXT: vpsrad $2, %xmm0, %xmm2 +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X86-NEXT: vpsrad $3, %xmm0, %xmm2 +; X86-NEXT: vpsrad $1, %xmm0, %xmm0 +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-AVX1-LABEL: signbits_mask_ashr_umax: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpsrad $26, %xmm0, %xmm2 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7] +; X64-AVX1-NEXT: vpsrad $27, %xmm0, %xmm3 +; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; X64-AVX1-NEXT: vpsrad $26, %xmm1, %xmm2 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X64-AVX1-NEXT: vpsrad $27, %xmm1, %xmm3 +; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; X64-AVX1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X64-AVX1-NEXT: vpsrad $4, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpsrad $2, %xmm0, %xmm2 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X64-AVX1-NEXT: vpsrad $3, %xmm0, %xmm2 +; X64-AVX1-NEXT: vpsrad $1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: signbits_mask_ashr_umax: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [25,26,27,0] +; X64-AVX2-NEXT: vpsravd %xmm2, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsravd %xmm2, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: retq + %1 = ashr <4 x i32> %a0, + %2 = ashr <4 x i32> %a1, + %3 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %1, <4 x i32> %2) + %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer + %5 = ashr <4 x i32> %4, + %6 = and <4 x i32> %5, + ret <4 x i32> %6 +} +declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone + +define <4 x i32> @signbits_mask_ashr_umin(<4 x i32> %a0, <4 x i32> %a1) { +; X86-LABEL: signbits_mask_ashr_umin: +; X86: # %bb.0: +; X86-NEXT: vpsrad $26, %xmm0, %xmm2 +; X86-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7] +; X86-NEXT: vpsrad $27, %xmm0, %xmm3 +; X86-NEXT: vpsrad $25, %xmm0, %xmm0 +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; X86-NEXT: vpsrad $26, %xmm1, %xmm2 +; X86-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X86-NEXT: vpsrad $27, %xmm1, %xmm3 +; X86-NEXT: vpsrad $25, %xmm1, %xmm1 +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; X86-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X86-NEXT: vpsrad $4, %xmm0, %xmm1 +; X86-NEXT: vpsrad $2, %xmm0, %xmm2 +; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X86-NEXT: vpsrad $3, %xmm0, %xmm2 +; X86-NEXT: vpsrad $1, %xmm0, %xmm0 +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; X86-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-AVX1-LABEL: signbits_mask_ashr_umin: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpsrad $26, %xmm0, %xmm2 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5,6,7] +; X64-AVX1-NEXT: vpsrad $27, %xmm0, %xmm3 +; X64-AVX1-NEXT: vpsrad $25, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7] +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; X64-AVX1-NEXT: vpsrad $26, %xmm1, %xmm2 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X64-AVX1-NEXT: vpsrad $27, %xmm1, %xmm3 +; X64-AVX1-NEXT: vpsrad $25, %xmm1, %xmm1 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7] +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] +; X64-AVX1-NEXT: vpsrad $4, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpsrad $2, %xmm0, %xmm2 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X64-AVX1-NEXT: vpsrad $3, %xmm0, %xmm2 +; X64-AVX1-NEXT: vpsrad $1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] +; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; X64-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: retq +; +; X64-AVX2-LABEL: signbits_mask_ashr_umin: +; X64-AVX2: # %bb.0: +; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [25,26,27,0] +; X64-AVX2-NEXT: vpsravd %xmm2, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsravd %xmm2, %xmm1, %xmm1 +; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: retq + %1 = ashr <4 x i32> %a0, + %2 = ashr <4 x i32> %a1, + %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %1, <4 x i32> %2) + %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer + %5 = ashr <4 x i32> %4, + %6 = and <4 x i32> %5, + ret <4 x i32> %6 +} +declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone + ; Make sure we can preserve sign bit information into the second basic block ; so we can avoid having to shift bit 0 into bit 7 for each element due to ; v32i1->v32i8 promotion and the splitting of v32i8 into 2xv16i8. This requires