[X86][SSE] Move 2-input limit up from getFauxShuffleMask to resolveTargetShuffleInputs (reapplied)

Reapplying an updated version of rL345395 (reverted in rL345451), now the issues noticed in PR39483 have been fixed. 

This patch allows resolveTargetShuffleInputs to remove UNDEF inputs from cases where we have more than 2 inputs.

llvm-svn: 345824
This commit is contained in:
Simon Pilgrim 2018-11-01 11:52:09 +00:00
parent fa28f335b8
commit 1f0a8421ad
2 changed files with 14 additions and 16 deletions

View File

@ -6325,9 +6325,6 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
if (!resolveTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG) ||
!resolveTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG))
return false;
// TODO - Add support for more than 2 inputs.
if ((SrcInputs0.size() + SrcInputs1.size()) > 2)
return false;
int MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
SmallVector<int, 64> Mask0, Mask1;
scaleShuffleMask<int>(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
@ -6386,8 +6383,7 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
}
Mask[i + InsertIdx] = M;
}
// TODO - Add support for more than 1 subinput.
return Ops.size() <= 2;
return true;
}
case ISD::SCALAR_TO_VECTOR: {
// Match against a scalar_to_vector of an extract from a vector,
@ -6580,7 +6576,8 @@ static bool resolveTargetShuffleInputs(SDValue Op,
return false;
resolveTargetShuffleInputsAndMask(Inputs, Mask);
return true;
// TODO - Add support for more than 2 inputs.
return Inputs.size() <= 2;
}
/// Returns the scalar element that will make up the ith

View File

@ -4019,11 +4019,11 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask4(<8 x double> %v
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask4:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm3[1]
; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,0,1,1]
; CHECK-NEXT: vmovapd %ymm1, %ymm0
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,5,5]
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4
; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1}
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
%cmp = fcmp oeq <4 x double> %mask, zeroinitializer
@ -4034,11 +4034,12 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask4(<8 x double> %v
define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask4(<8 x double> %vec, <4 x double> %mask) {
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask4:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,1,1]
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [1,1,5,5]
; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z}
; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
%cmp = fcmp oeq <4 x double> %mask, zeroinitializer