forked from OSchip/llvm-project
[X86][AVX512] Only combine EVEX targets shuffles to shuffles of the same number of vector elements
Over eager combing prevents the correct folding of writemasks. At the moment this occurs for ALL EVEX shuffles, in the future we need to check that the user of the root shuffle is a VSELECT that can fold to a writemask. llvm-svn: 279934
This commit is contained in:
parent
5728200f33
commit
5369cd9e9c
|
@ -25287,6 +25287,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
}
|
||||
|
||||
unsigned RootSizeInBits = RootVT.getSizeInBits();
|
||||
unsigned NumRootElts = RootVT.getVectorNumElements();
|
||||
unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
|
||||
bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() ||
|
||||
(RootVT.is256BitVector() && !Subtarget.hasAVX2());
|
||||
|
@ -25297,11 +25298,10 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
// TODO - this currently prevents all lane shuffles from occurring.
|
||||
// TODO - check for writemasks usage instead of always preventing combining.
|
||||
// TODO - attempt to narrow Mask back to writemask size.
|
||||
if (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits &&
|
||||
(RootSizeInBits == 512 ||
|
||||
(Subtarget.hasVLX() && RootSizeInBits >= 128))) {
|
||||
bool IsEVEXShuffle =
|
||||
RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128);
|
||||
if (IsEVEXShuffle && (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits))
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO - handle 128/256-bit lane shuffles of 512-bit vectors.
|
||||
|
||||
|
@ -25370,6 +25370,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
if (matchUnaryVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, ShuffleVT)) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
return false; // Nothing to do!
|
||||
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
|
||||
return false; // AVX512 Writemask clash.
|
||||
Res = DAG.getBitcast(ShuffleVT, V1);
|
||||
DCI.AddToWorklist(Res.getNode());
|
||||
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
|
||||
|
@ -25383,6 +25385,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
ShuffleVT, PermuteImm)) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
return false; // Nothing to do!
|
||||
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
|
||||
return false; // AVX512 Writemask clash.
|
||||
Res = DAG.getBitcast(ShuffleVT, V1);
|
||||
DCI.AddToWorklist(Res.getNode());
|
||||
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
|
||||
|
@ -25398,6 +25402,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
ShuffleVT)) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
return false; // Nothing to do!
|
||||
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
|
||||
return false; // AVX512 Writemask clash.
|
||||
V1 = DAG.getBitcast(ShuffleVT, V1);
|
||||
DCI.AddToWorklist(V1.getNode());
|
||||
V2 = DAG.getBitcast(ShuffleVT, V2);
|
||||
|
@ -25413,6 +25419,8 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
|||
Shuffle, ShuffleVT, PermuteImm)) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
return false; // Nothing to do!
|
||||
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
|
||||
return false; // AVX512 Writemask clash.
|
||||
V1 = DAG.getBitcast(ShuffleVT, V1);
|
||||
DCI.AddToWorklist(V1.getNode());
|
||||
V2 = DAG.getBitcast(ShuffleVT, V2);
|
||||
|
|
|
@ -3241,7 +3241,8 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(<4 x float> %a0) nounwind {
|
|||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
|
||||
; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; AVX512VL-NEXT: addq $24, %rsp
|
||||
; AVX512VL-NEXT: popq %rbx
|
||||
; AVX512VL-NEXT: popq %r14
|
||||
|
@ -4148,7 +4149,8 @@ define void @store_cvt_4f32_to_8i16_zero(<4 x float> %a0, <8 x i16>* %a1) nounwi
|
|||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
|
||||
; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; AVX512VL-NEXT: vmovdqa32 %xmm0, (%r14)
|
||||
; AVX512VL-NEXT: addq $16, %rsp
|
||||
; AVX512VL-NEXT: popq %rbx
|
||||
|
@ -5136,7 +5138,8 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind {
|
|||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
|
||||
; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; AVX512VL-NEXT: addq $40, %rsp
|
||||
; AVX512VL-NEXT: popq %rbx
|
||||
; AVX512VL-NEXT: popq %r14
|
||||
|
@ -5939,7 +5942,8 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw
|
|||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
|
||||
; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; AVX512VL-NEXT: vmovdqa32 %xmm0, (%r14)
|
||||
; AVX512VL-NEXT: addq $32, %rsp
|
||||
; AVX512VL-NEXT: popq %rbx
|
||||
|
|
Loading…
Reference in New Issue