forked from OSchip/llvm-project
[SelectionDAG] enhance vector demanded elements to look at a vector select condition operand
This is the DAG equivalent of D51433. If we know we're not using all vector lanes, use that knowledge to potentially simplify a vselect condition. The reduction/horizontal tests show that we are eliminating AVX1 operations on the upper half of 256-bit vectors because we don't need those anyway. I'm not sure what the pr34592 test is showing. That's run with -O0; is SimplifyDemandedVectorElts supposed to be running there? Differential Revision: https://reviews.llvm.org/D51696 llvm-svn: 341762
This commit is contained in:
parent
fd1dc75b40
commit
6ebf218e4c
|
@ -1532,12 +1532,20 @@ bool TargetLowering::SimplifyDemandedVectorElts(
|
|||
break;
|
||||
}
|
||||
case ISD::VSELECT: {
|
||||
APInt DemandedLHS(DemandedElts);
|
||||
APInt DemandedRHS(DemandedElts);
|
||||
|
||||
// TODO - add support for constant vselect masks.
|
||||
// Try to transform the select condition based on the current demanded
|
||||
// elements.
|
||||
// TODO: If a condition element is undef, we can choose from one arm of the
|
||||
// select (and if one arm is undef, then we can propagate that to the
|
||||
// result).
|
||||
// TODO - add support for constant vselect masks (see IR version of this).
|
||||
APInt UnusedUndef, UnusedZero;
|
||||
if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
|
||||
UnusedZero, TLO, Depth + 1))
|
||||
return true;
|
||||
|
||||
// See if we can simplify either vselect operand.
|
||||
APInt DemandedLHS(DemandedElts);
|
||||
APInt DemandedRHS(DemandedElts);
|
||||
APInt UndefLHS, ZeroLHS;
|
||||
APInt UndefRHS, ZeroRHS;
|
||||
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
|
||||
|
|
|
@ -469,9 +469,6 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
|||
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
|
||||
|
@ -548,9 +545,6 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
|||
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
|
||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
|
@ -1159,9 +1153,6 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
|||
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
|
||||
|
@ -1283,9 +1274,6 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
|||
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
|
||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
|
|
|
@ -472,9 +472,6 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
|||
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
|
||||
|
@ -552,9 +549,6 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
|||
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
|
||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
|
@ -1163,9 +1157,6 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
|||
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
|
||||
|
@ -1287,9 +1278,6 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
|||
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
|
||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
|
|
|
@ -535,12 +535,8 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
|||
; X86-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; X86-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
||||
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
|
||||
|
@ -631,12 +627,8 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
|||
; X64-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; X64-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2
|
||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
||||
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
|
@ -1270,12 +1262,8 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
|||
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; X86-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
|
||||
|
@ -1422,12 +1410,8 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
|||
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm2
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; X64-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
|
||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
|
|
|
@ -473,12 +473,8 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
|||
; X86-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; X86-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
||||
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
|
||||
|
@ -571,12 +567,8 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) {
|
|||
; X64-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; X64-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
||||
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
|
@ -1172,12 +1164,8 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
|||
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
|
||||
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
|
||||
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; X86-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
|
||||
; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
|
||||
; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X86-AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
|
||||
|
@ -1326,12 +1314,8 @@ define i64 @test_reduce_v8i64(<8 x i64> %a0) {
|
|||
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
|
||||
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
|
||||
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; X64-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
|
||||
; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
|
||||
; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; X64-AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX1-NEXT: vzeroupper
|
||||
|
|
|
@ -19,31 +19,30 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1
|
|||
; CHECK-NEXT: vmovaps 80(%rbp), %ymm13
|
||||
; CHECK-NEXT: vmovaps 48(%rbp), %ymm14
|
||||
; CHECK-NEXT: vmovaps 16(%rbp), %ymm15
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7]
|
||||
; CHECK-NEXT: vxorps %xmm6, %xmm6, %xmm6
|
||||
; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1],ymm8[2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1],ymm11[2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm11 = ymm6[0,1,2,3],ymm11[4,5],ymm6[6,7]
|
||||
; CHECK-NEXT: # kill: def $xmm9 killed $xmm9 killed $ymm9
|
||||
; CHECK-NEXT: vmovdqa %xmm9, %xmm11
|
||||
; CHECK-NEXT: # kill: def $ymm11 killed $xmm11
|
||||
; CHECK-NEXT: vpalignr {{.*#+}} ymm6 = ymm2[8,9,10,11,12,13,14,15],ymm6[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm6[16,17,18,19,20,21,22,23]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[2,3,2,0]
|
||||
; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: vmovdqa %xmm9, %xmm0
|
||||
; CHECK-NEXT: # kill: def $ymm0 killed $xmm0
|
||||
; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[2,3,2,0]
|
||||
; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: # implicit-def: $ymm0
|
||||
; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5],ymm6[6,7]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm11[0,1,2,3],ymm0[4,5],ymm11[6,7]
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm9
|
||||
; CHECK-NEXT: # implicit-def: $ymm2
|
||||
; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm2, %ymm2
|
||||
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm6 # 32-byte Reload
|
||||
; CHECK-NEXT: vpunpcklqdq {{.*#+}} ymm6 = ymm7[0],ymm6[0],ymm7[2],ymm6[2]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm7[0,1],ymm6[2,3],ymm7[4,5],ymm6[6,7]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[2,1,2,3]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7]
|
||||
; CHECK-NEXT: vmovaps %xmm7, %xmm9
|
||||
; CHECK-NEXT: vpslldq {{.*#+}} xmm9 = zero,zero,zero,zero,zero,zero,zero,zero,xmm9[0,1,2,3,4,5,6,7]
|
||||
; CHECK-NEXT: # implicit-def: $ymm6
|
||||
; CHECK-NEXT: vmovaps %xmm9, %xmm6
|
||||
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Reload
|
||||
; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm11[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm11[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1,2,3],ymm11[4,5,6,7]
|
||||
|
@ -56,9 +55,9 @@ define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <1
|
|||
; CHECK-NEXT: vmovaps %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: vmovaps %ymm6, %ymm3
|
||||
; CHECK-NEXT: vmovaps %ymm15, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: vmovaps %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: vmovaps %ymm13, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: vmovaps %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: vmovaps %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: vmovaps %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||||
; CHECK-NEXT: vmovaps %ymm14, (%rsp) # 32-byte Spill
|
||||
; CHECK-NEXT: movq %rbp, %rsp
|
||||
|
|
|
@ -158,9 +158,6 @@ define i64 @test_v4i64(<4 x i64> %a0) {
|
|||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
@ -343,9 +340,6 @@ define i64 @test_v8i64(<8 x i64> %a0) {
|
|||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
@ -645,9 +639,6 @@ define i64 @test_v16i64(<16 x i64> %a0) {
|
|||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
|
|
@ -157,9 +157,6 @@ define i64 @test_v4i64(<4 x i64> %a0) {
|
|||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
@ -342,9 +339,6 @@ define i64 @test_v8i64(<8 x i64> %a0) {
|
|||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
@ -644,9 +638,6 @@ define i64 @test_v16i64(<16 x i64> %a0) {
|
|||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
|
|
@ -164,12 +164,8 @@ define i64 @test_v4i64(<4 x i64> %a0) {
|
|||
; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
|
||||
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
||||
; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
@ -364,12 +360,8 @@ define i64 @test_v8i64(<8 x i64> %a0) {
|
|||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
|
||||
; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
@ -693,10 +685,6 @@ define i64 @test_v16i64(<16 x i64> %a0) {
|
|||
; AVX1-NEXT: vxorpd %xmm4, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vxorpd %xmm4, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
|
|
@ -163,12 +163,8 @@ define i64 @test_v4i64(<4 x i64> %a0) {
|
|||
; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
|
||||
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
|
||||
; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
|
||||
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
|
||||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
@ -363,12 +359,8 @@ define i64 @test_v8i64(<8 x i64> %a0) {
|
|||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
|
||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
|
||||
; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
@ -692,10 +684,6 @@ define i64 @test_v16i64(<16 x i64> %a0) {
|
|||
; AVX1-NEXT: vxorpd %xmm4, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vxorpd %xmm4, %xmm1, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
|
||||
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
|
||||
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovq %xmm0, %rax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
|
Loading…
Reference in New Issue