forked from OSchip/llvm-project
[AVX-512] Fix masked VPERMI2PS isel when the index comes from a bitcast.
We need to bitcast the index operand to a floating point type so that it matches the result type. If not then the passthru part of the DAG will be a bitcast from the index's original type to the destination type. This makes it very difficult to match. The other option would be to add 5 sets of patterns for every other possible type. llvm-svn: 280696
This commit is contained in:
parent
cf9f1b8dfa
commit
4fa3b50fc3
|
@ -17869,19 +17869,21 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
|
|||
}
|
||||
case VPERM_3OP_MASKZ:
|
||||
case VPERM_3OP_MASK:{
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
// Src2 is the PassThru
|
||||
SDValue Src1 = Op.getOperand(1);
|
||||
SDValue Src2 = Op.getOperand(2);
|
||||
// PassThru needs to be the same type as the destination in order
|
||||
// to pattern match correctly.
|
||||
SDValue Src2 = DAG.getBitcast(VT, Op.getOperand(2));
|
||||
SDValue Src3 = Op.getOperand(3);
|
||||
SDValue Mask = Op.getOperand(4);
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
SDValue PassThru = SDValue();
|
||||
|
||||
// set PassThru element
|
||||
if (IntrData->Type == VPERM_3OP_MASKZ)
|
||||
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
|
||||
else
|
||||
PassThru = DAG.getBitcast(VT, Src2);
|
||||
PassThru = Src2;
|
||||
|
||||
// Swap Src1 and Src2 in the node creation
|
||||
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
|
||||
|
|
|
@ -299,22 +299,6 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
|
|||
(vselect _.KRCWM:$mask, RHS, _.RC:$src1),
|
||||
vselect, "", NoItinerary, IsCommutable, IsKCommutable>;
|
||||
|
||||
// Similar to AVX512_maskable_3src but in this case the input VT for the tied
|
||||
// operand differs from the output VT. This requires a bitconvert on
|
||||
// the preserved vector going into the vselect.
|
||||
multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
|
||||
X86VectorVTInfo InVT,
|
||||
dag Outs, dag NonTiedIns, string OpcodeStr,
|
||||
string AttSrcAsm, string IntelSrcAsm,
|
||||
dag RHS> :
|
||||
AVX512_maskable_common<O, F, OutVT, Outs,
|
||||
!con((ins InVT.RC:$src1), NonTiedIns),
|
||||
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
|
||||
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
|
||||
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
|
||||
(vselect InVT.KRCWM:$mask, RHS,
|
||||
(bitconvert InVT.RC:$src1))>;
|
||||
|
||||
multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
|
||||
dag Outs, dag NonTiedIns, string OpcodeStr,
|
||||
string AttSrcAsm, string IntelSrcAsm,
|
||||
|
@ -1181,83 +1165,76 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
|
|||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// -- VPERMI2 - 3 source operands form --
|
||||
multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
|
||||
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
|
||||
multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
|
||||
// The index operand in the pattern should really be an integer type. However,
|
||||
// if we do that and it happens to come from a bitcast, then it becomes
|
||||
// difficult to find the bitcast needed to convert the index to the
|
||||
// destination type for the passthru since it will be folded with the bitcast
|
||||
// of the index operand.
|
||||
defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.RC:$src3),
|
||||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
|
||||
(_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
|
||||
AVX5128IBase;
|
||||
|
||||
defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
|
||||
defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.MemOp:$src3),
|
||||
OpcodeStr, "$src3, $src2", "$src2, $src3",
|
||||
(_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2,
|
||||
(_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
|
||||
(_.VT (bitconvert (_.LdFrag addr:$src3)))))>,
|
||||
EVEX_4V, AVX5128IBase;
|
||||
}
|
||||
}
|
||||
multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
|
||||
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
|
||||
X86VectorVTInfo _> {
|
||||
let Constraints = "$src1 = $dst" in
|
||||
defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
|
||||
defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _.RC:$src2, _.ScalarMemOp:$src3),
|
||||
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
|
||||
!strconcat("$src2, ${src3}", _.BroadcastStr ),
|
||||
(_.VT (X86VPermi2X IdxVT.RC:$src1,
|
||||
(_.VT (X86VPermi2X _.RC:$src1,
|
||||
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
|
||||
AVX5128IBase, EVEX_4V, EVEX_B;
|
||||
}
|
||||
|
||||
multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
|
||||
AVX512VLVectorVTInfo VTInfo,
|
||||
AVX512VLVectorVTInfo ShuffleMask> {
|
||||
defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
|
||||
ShuffleMask.info512>,
|
||||
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512,
|
||||
ShuffleMask.info512>, EVEX_V512;
|
||||
AVX512VLVectorVTInfo VTInfo> {
|
||||
defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>,
|
||||
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
|
||||
let Predicates = [HasVLX] in {
|
||||
defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
|
||||
ShuffleMask.info128>,
|
||||
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128,
|
||||
ShuffleMask.info128>, EVEX_V128;
|
||||
defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
|
||||
ShuffleMask.info256>,
|
||||
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256,
|
||||
ShuffleMask.info256>, EVEX_V256;
|
||||
defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>,
|
||||
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
|
||||
defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>,
|
||||
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
|
||||
AVX512VLVectorVTInfo VTInfo,
|
||||
AVX512VLVectorVTInfo Idx,
|
||||
Predicate Prd> {
|
||||
let Predicates = [Prd] in
|
||||
defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
|
||||
Idx.info512>, EVEX_V512;
|
||||
defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
|
||||
let Predicates = [Prd, HasVLX] in {
|
||||
defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
|
||||
Idx.info128>, EVEX_V128;
|
||||
defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
|
||||
Idx.info256>, EVEX_V256;
|
||||
defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
|
||||
defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
|
||||
}
|
||||
}
|
||||
|
||||
defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d",
|
||||
avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
|
||||
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
|
||||
defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q",
|
||||
avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w",
|
||||
avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
|
||||
avx512vl_i16_info, HasBWI>,
|
||||
VEX_W, EVEX_CD8<16, CD8VF>;
|
||||
defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b",
|
||||
avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
|
||||
avx512vl_i8_info, HasVBMI>,
|
||||
EVEX_CD8<8, CD8VF>;
|
||||
defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps",
|
||||
avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
|
||||
avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
|
||||
defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd",
|
||||
avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
// VPERMT2
|
||||
multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
|
||||
|
|
|
@ -414,10 +414,12 @@ def X86VPermt2 : SDNode<"X86ISD::VPERMV3",
|
|||
SDTCisSameSizeAs<0,2>,
|
||||
SDTCisSameAs<0,3>]>, []>;
|
||||
|
||||
// Even though the index operand should be integer, we need to make it match the
|
||||
// destination type so that we can pattern match the masked version where the
|
||||
// index is also the passthru operand.
|
||||
def X86VPermi2X : SDNode<"X86ISD::VPERMIV3",
|
||||
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
|
||||
SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>,
|
||||
SDTCisSameSizeAs<0,1>,
|
||||
SDTypeProfile<1, 3, [SDTCisVec<0>,
|
||||
SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>,
|
||||
SDTCisSameAs<0,3>]>, []>;
|
||||
|
||||
|
|
|
@ -2030,9 +2030,8 @@ define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> %
|
|||
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd9]
|
||||
; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x77,0xda]
|
||||
; CHECK-NEXT: vblendmps %xmm3, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc3]
|
||||
; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
|
||||
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%x1cast = bitcast <2 x i64> %x1 to <4 x i32>
|
||||
%res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2, i8 %x3)
|
||||
|
|
Loading…
Reference in New Issue