[SelectionDAG] enhance vector demanded elements to look at a vector select condition operand

This is the DAG equivalent of D51433.
If we know we're not using all vector lanes, use that knowledge to potentially simplify a vselect condition.

The reduction/horizontal tests show that we are eliminating AVX1 operations on the upper half of 256-bit 
vectors because we don't need those anyway.
I'm not sure what the pr34592 test is showing. That's run with -O0; is SimplifyDemandedVectorElts supposed 
to be running there?

Differential Revision: https://reviews.llvm.org/D51696

llvm-svn: 341762
This commit is contained in:
Sanjay Patel 2018-09-09 14:13:22 +00:00
parent fd1dc75b40
commit 6ebf218e4c
10 changed files with 47 additions and 138 deletions

View File

@ -1532,12 +1532,20 @@ bool TargetLowering::SimplifyDemandedVectorElts(
break;
}
case ISD::VSELECT: {
APInt DemandedLHS(DemandedElts);
APInt DemandedRHS(DemandedElts);
// TODO - add support for constant vselect masks.
// Try to transform the select condition based on the current demanded
// elements.
// TODO: If a condition element is undef, we can choose from one arm of the
// select (and if one arm is undef, then we can propagate that to the
// result).
// TODO - add support for constant vselect masks (see IR version of this).
APInt UnusedUndef, UnusedZero;
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
UnusedZero, TLO, Depth + 1))
return true;
// See if we can simplify either vselect operand.
APInt DemandedLHS(DemandedElts);
APInt DemandedRHS(DemandedElts);
APInt UndefLHS, ZeroLHS;
APInt UndefRHS, ZeroRHS;
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,

View File

@ -469,9 +469,6 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@ -548,9 +545,6 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper
@ -1159,9 +1153,6 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@ -1283,9 +1274,6 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper

View File

@ -472,9 +472,6 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@ -552,9 +549,6 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper
@ -1163,9 +1157,6 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@ -1287,9 +1278,6 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper

View File

@ -535,12 +535,8 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; X86-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@ -631,12 +627,8 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; X64-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2
; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper
@ -1270,12 +1262,8 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm2
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; X86-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@ -1422,12 +1410,8 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm2
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; X64-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper

View File

@ -473,12 +473,8 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X86-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; X86-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm2
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@ -571,12 +567,8 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
; X64-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; X64-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm2
; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper
@ -1172,12 +1164,8 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; X86-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@ -1326,12 +1314,8 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; X64-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper

View File

@ -19,31 +19,30 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1
; CHECK-NEXT: vmovaps 80(%rbp), %ymm13
; CHECK-NEXT: vmovaps 48(%rbp), %ymm14
; CHECK-NEXT: vmovaps 16(%rbp), %ymm15
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7]
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7]
; CHECK-NEXT: vxorps %xmm6, %xmm6, %xmm6
; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1],ymm8[2,3,4,5,6,7]
; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1],ymm11[2,3,4,5,6,7]
; CHECK-NEXT: vpblendd {{.*#+}} ymm11 = ymm6[0,1,2,3],ymm11[4,5],ymm6[6,7]
; CHECK-NEXT: # kill: def $xmm9 killed $xmm9 killed $ymm9
; CHECK-NEXT: vmovdqa %xmm9, %xmm11
; CHECK-NEXT: # kill: def $ymm11 killed $xmm11
; CHECK-NEXT: vpalignr {{.*#+}} ymm6 = ymm2[8,9,10,11,12,13,14,15],ymm6[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm6[16,17,18,19,20,21,22,23]
; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[2,3,2,0]
; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovdqa %xmm9, %xmm0
; CHECK-NEXT: # kill: def $ymm0 killed $xmm0
; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23]
; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[2,3,2,0]
; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: # implicit-def: $ymm0
; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5],ymm6[6,7]
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm11[0,1,2,3],ymm0[4,5],ymm11[6,7]
; CHECK-NEXT: vmovaps %xmm2, %xmm9
; CHECK-NEXT: # implicit-def: $ymm2
; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm2, %ymm2
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm6 # 32-byte Reload
; CHECK-NEXT: vpunpcklqdq {{.*#+}} ymm6 = ymm7[0],ymm6[0],ymm7[2],ymm6[2]
; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm7[0,1],ymm6[2,3],ymm7[4,5],ymm6[6,7]
; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[2,1,2,3]
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7]
; CHECK-NEXT: vmovaps %xmm7, %xmm9
; CHECK-NEXT: vpslldq {{.*#+}} xmm9 = zero,zero,zero,zero,zero,zero,zero,zero,xmm9[0,1,2,3,4,5,6,7]
; CHECK-NEXT: # implicit-def: $ymm6
; CHECK-NEXT: vmovaps %xmm9, %xmm6
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Reload
; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm11[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm11[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23]
; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3]
; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1,2,3],ymm11[4,5,6,7]
@ -56,9 +55,9 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1
; CHECK-NEXT: vmovaps %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm6, %ymm3
; CHECK-NEXT: vmovaps %ymm15, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm13, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm14, (%rsp) # 32-byte Spill
; CHECK-NEXT: movq %rbp, %rsp

View File

@ -158,9 +158,6 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@ -343,9 +340,6 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@ -645,9 +639,6 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper

View File

@ -157,9 +157,6 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@ -342,9 +339,6 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@ -644,9 +638,6 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper

View File

@ -164,12 +164,8 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@ -364,12 +360,8 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@ -693,10 +685,6 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX1-NEXT: vxorpd %xmm4, %xmm0, %xmm2
; AVX1-NEXT: vxorpd %xmm4, %xmm1, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper

View File

@ -163,12 +163,8 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@ -363,12 +359,8 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@ -692,10 +684,6 @@ define i64 @test_v16i64(<16 x i64> %a0) {
; AVX1-NEXT: vxorpd %xmm4, %xmm0, %xmm2
; AVX1-NEXT: vxorpd %xmm4, %xmm1, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper