forked from OSchip/llvm-project
[AVX-512] Fix DecodeVPERMV3Mask to handle cases where the constant pool entry has a different type than the shuffle itself.
Summary: This is especially important for 32-bit targets with 64-bit shuffle elements.This is similar to how PSHUFB and VPERMIL handle the same problem. Reviewers: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D25666 llvm-svn: 284451
This commit is contained in:
parent
175a415e78
commit
7268bf99ab
|
@ -5102,8 +5102,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
|
|||
Ops.push_back(N->getOperand(0));
|
||||
Ops.push_back(N->getOperand(2));
|
||||
SDValue MaskNode = N->getOperand(1);
|
||||
unsigned MaskEltSize = VT.getScalarSizeInBits();
|
||||
if (auto *C = getTargetConstantFromNode(MaskNode)) {
|
||||
DecodeVPERMV3Mask(C, VT, Mask);
|
||||
DecodeVPERMV3Mask(C, MaskEltSize, Mask);
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
|
@ -5114,8 +5115,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
|
|||
Ops.push_back(N->getOperand(1));
|
||||
Ops.push_back(N->getOperand(2));
|
||||
SDValue MaskNode = N->getOperand(0);
|
||||
unsigned MaskEltSize = VT.getScalarSizeInBits();
|
||||
if (auto *C = getTargetConstantFromNode(MaskNode)) {
|
||||
DecodeVPERMV3Mask(C, VT, Mask);
|
||||
DecodeVPERMV3Mask(C, MaskEltSize, Mask);
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -309,26 +309,31 @@ void DecodeVPERMVMask(const Constant *C, MVT VT,
|
|||
ShuffleMask.push_back(Element);
|
||||
}
|
||||
|
||||
void DecodeVPERMV3Mask(const Constant *C, MVT VT,
|
||||
void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
|
||||
SmallVectorImpl<int> &ShuffleMask) {
|
||||
Type *MaskTy = C->getType();
|
||||
unsigned NumElements = MaskTy->getVectorNumElements();
|
||||
if (NumElements == VT.getVectorNumElements()) {
|
||||
unsigned EltMaskSize = Log2_64(NumElements * 2);
|
||||
for (unsigned i = 0; i < NumElements; ++i) {
|
||||
Constant *COp = C->getAggregateElement(i);
|
||||
if (!COp) {
|
||||
ShuffleMask.clear();
|
||||
return;
|
||||
}
|
||||
if (isa<UndefValue>(COp))
|
||||
ShuffleMask.push_back(SM_SentinelUndef);
|
||||
else {
|
||||
APInt Element = cast<ConstantInt>(COp)->getValue();
|
||||
Element = Element.getLoBits(EltMaskSize);
|
||||
ShuffleMask.push_back(Element.getZExtValue());
|
||||
}
|
||||
unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
|
||||
(void)MaskTySize;
|
||||
assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
|
||||
"Unexpected vector size.");
|
||||
assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
|
||||
"Unexpected vector element size.");
|
||||
|
||||
// The shuffle mask requires elements the same size as the target.
|
||||
SmallBitVector UndefElts;
|
||||
SmallVector<uint64_t, 8> RawMask;
|
||||
if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
|
||||
return;
|
||||
|
||||
unsigned NumElts = RawMask.size();
|
||||
|
||||
for (unsigned i = 0; i != NumElts; ++i) {
|
||||
if (UndefElts[i]) {
|
||||
ShuffleMask.push_back(SM_SentinelUndef);
|
||||
continue;
|
||||
}
|
||||
int Index = RawMask[i] & (NumElts*2 - 1);
|
||||
ShuffleMask.push_back(Index);
|
||||
}
|
||||
}
|
||||
} // llvm namespace
|
||||
|
|
|
@ -44,7 +44,7 @@ void DecodeVPERMVMask(const Constant *C, MVT VT,
|
|||
SmallVectorImpl<int> &ShuffleMask);
|
||||
|
||||
/// Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
|
||||
void DecodeVPERMV3Mask(const Constant *C, MVT VT,
|
||||
void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
|
||||
SmallVectorImpl<int> &ShuffleMask);
|
||||
|
||||
} // llvm namespace
|
||||
|
|
|
@ -112,10 +112,6 @@ define <8 x i64> @combine_permvar_8i64_identity_mask(<8 x i64> %x0, <8 x i64> %x
|
|||
define <8 x double> @combine_vpermt2var_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
|
||||
; X32-LABEL: combine_vpermt2var_8f64_identity:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vmovapd {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
|
||||
; X32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
|
||||
; X32-NEXT: vmovapd {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
|
||||
; X32-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_vpermt2var_8f64_identity:
|
||||
|
@ -152,8 +148,7 @@ define <8 x double> @combine_vpermt2var_8f64_identity_mask(<8 x double> %x0, <8
|
|||
define <8 x double> @combine_vpermt2var_8f64_movddup(<8 x double> %x0, <8 x double> %x1) {
|
||||
; X32-LABEL: combine_vpermt2var_8f64_movddup:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vmovapd {{.*#+}} zmm2 = <0,0,0,0,2,0,2,0,4,0,4,0,u,u,u,u>
|
||||
; X32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
|
||||
; X32-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_vpermt2var_8f64_movddup:
|
||||
|
@ -167,10 +162,7 @@ define <8 x double> @combine_vpermt2var_8f64_movddup_load(<8 x double> *%p0, <8
|
|||
; X32-LABEL: combine_vpermt2var_8f64_movddup_load:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: vmovapd (%eax), %zmm1
|
||||
; X32-NEXT: vmovapd {{.*#+}} zmm2 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0]
|
||||
; X32-NEXT: vpermt2pd %zmm0, %zmm2, %zmm1
|
||||
; X32-NEXT: vmovapd %zmm1, %zmm0
|
||||
; X32-NEXT: vmovddup {{.*#+}} zmm0 = mem[0,0,2,2,4,4,6,6]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_vpermt2var_8f64_movddup_load:
|
||||
|
@ -186,8 +178,7 @@ define <8 x double> @combine_vpermt2var_8f64_movddup_mask(<8 x double> %x0, <8 x
|
|||
; X32: # BB#0:
|
||||
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: kmovd %eax, %k1
|
||||
; X32-NEXT: vmovapd {{.*#+}} zmm2 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0]
|
||||
; X32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0 {%k1} {z}
|
||||
; X32-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_vpermt2var_8f64_movddup_mask:
|
||||
|
@ -868,10 +859,6 @@ define <32 x i16> @combine_pshufb_as_pshufw(<32 x i16> %a0) {
|
|||
define <8 x double> @combine_vpermi2var_8f64_identity(<8 x double> %x0, <8 x double> %x1) {
|
||||
; X32-LABEL: combine_vpermi2var_8f64_identity:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: vmovapd {{.*#+}} zmm2 = [7,0,6,0,5,0,4,0,3,0,2,0,1,0,0,0]
|
||||
; X32-NEXT: vpermi2pd %zmm1, %zmm0, %zmm2
|
||||
; X32-NEXT: vmovapd {{.*#+}} zmm0 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
|
||||
; X32-NEXT: vpermi2pd %zmm2, %zmm2, %zmm0
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: combine_vpermi2var_8f64_identity:
|
||||
|
|
Loading…
Reference in New Issue