forked from OSchip/llvm-project
[X86][SSE] Move 2-input limit up from getFauxShuffleMask to resolveTargetShuffleInputs (reapplied)
Reapplying an updated version of rL345395 (reverted in rL345451), now the issues noticed in PR39483 have been fixed. This patch allows resolveTargetShuffleInputs to remove UNDEF inputs from cases where we have more than 2 inputs. llvm-svn: 345824
This commit is contained in:
parent
fa28f335b8
commit
1f0a8421ad
|
@ -6325,9 +6325,6 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
|
|||
if (!resolveTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG) ||
|
||||
!resolveTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG))
|
||||
return false;
|
||||
// TODO - Add support for more than 2 inputs.
|
||||
if ((SrcInputs0.size() + SrcInputs1.size()) > 2)
|
||||
return false;
|
||||
int MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
|
||||
SmallVector<int, 64> Mask0, Mask1;
|
||||
scaleShuffleMask<int>(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
|
||||
|
@ -6386,8 +6383,7 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
|
|||
}
|
||||
Mask[i + InsertIdx] = M;
|
||||
}
|
||||
// TODO - Add support for more than 1 subinput.
|
||||
return Ops.size() <= 2;
|
||||
return true;
|
||||
}
|
||||
case ISD::SCALAR_TO_VECTOR: {
|
||||
// Match against a scalar_to_vector of an extract from a vector,
|
||||
|
@ -6580,7 +6576,8 @@ static bool resolveTargetShuffleInputs(SDValue Op,
|
|||
return false;
|
||||
|
||||
resolveTargetShuffleInputsAndMask(Inputs, Mask);
|
||||
return true;
|
||||
// TODO - Add support for more than 2 inputs.
|
||||
return Inputs.size() <= 2;
|
||||
}
|
||||
|
||||
/// Returns the scalar element that will make up the ith
|
||||
|
|
|
@ -4019,11 +4019,11 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask4(<8 x double> %v
|
|||
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
|
||||
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm3[1]
|
||||
; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
|
||||
; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
|
||||
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,0,1,1]
|
||||
; CHECK-NEXT: vmovapd %ymm1, %ymm0
|
||||
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [1,1,5,5]
|
||||
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4
|
||||
; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
|
||||
; CHECK-NEXT: vblendmpd %ymm4, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
|
||||
%cmp = fcmp oeq <4 x double> %mask, zeroinitializer
|
||||
|
@ -4034,11 +4034,12 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask4(<8 x double> %v
|
|||
define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask4(<8 x double> %vec, <4 x double> %mask) {
|
||||
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
|
||||
; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
|
||||
; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
|
||||
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,1,1]
|
||||
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
|
||||
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [1,1,5,5]
|
||||
; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
|
||||
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1
|
||||
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vmovapd %ymm2, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%shuf = shufflevector <8 x double> %vec, <8 x double> undef, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
|
||||
%cmp = fcmp oeq <4 x double> %mask, zeroinitializer
|
||||
|
|
Loading…
Reference in New Issue