[AVX-512] Fix masked VPERMI2PS isel when the index comes from a bitcast.

We need to bitcast the index operand to a floating point type so that it matches the result type. If not then the passthru part of the DAG will be a bitcast from the index's original type to the destination type. This makes it very difficult to match. The other option would be to add 5 sets of patterns for every other possible type.

llvm-svn: 280696
This commit is contained in:
Craig Topper 2016-09-06 06:56:59 +00:00
parent cf9f1b8dfa
commit 4fa3b50fc3
4 changed files with 41 additions and 61 deletions

View File

@ -17869,19 +17869,21 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
}
case VPERM_3OP_MASKZ:
case VPERM_3OP_MASK:{
MVT VT = Op.getSimpleValueType();
// Src2 is the PassThru
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
// PassThru needs to be the same type as the destination in order
// to pattern match correctly.
SDValue Src2 = DAG.getBitcast(VT, Op.getOperand(2));
SDValue Src3 = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
MVT VT = Op.getSimpleValueType();
SDValue PassThru = SDValue();
// set PassThru element
if (IntrData->Type == VPERM_3OP_MASKZ)
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
else
PassThru = DAG.getBitcast(VT, Src2);
PassThru = Src2;
// Swap Src1 and Src2 in the node creation
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,

View File

@ -299,22 +299,6 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
(vselect _.KRCWM:$mask, RHS, _.RC:$src1),
vselect, "", NoItinerary, IsCommutable, IsKCommutable>;
// Similar to AVX512_maskable_3src but in this case the input VT for the tied
// operand differs from the output VT. This requires a bitconvert on
// the preserved vector going into the vselect.
multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
X86VectorVTInfo InVT,
dag Outs, dag NonTiedIns, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS> :
AVX512_maskable_common<O, F, OutVT, Outs,
!con((ins InVT.RC:$src1), NonTiedIns),
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(vselect InVT.KRCWM:$mask, RHS,
(bitconvert InVT.RC:$src1))>;
multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag NonTiedIns, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
@ -1181,83 +1165,76 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
//===----------------------------------------------------------------------===//
// -- VPERMI2 - 3 source operands form --
multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
let Constraints = "$src1 = $dst" in {
defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
// The index operand in the pattern should really be an integer type. However,
// if we do that and it happens to come from a bitcast, then it becomes
// difficult to find the bitcast needed to convert the index to the
// destination type for the passthru since it will be folded with the bitcast
// of the index operand.
defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
(_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
AVX5128IBase;
defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2,
(_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
(_.VT (bitconvert (_.LdFrag addr:$src3)))))>,
EVEX_4V, AVX5128IBase;
}
}
multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
X86VectorVTInfo _> {
let Constraints = "$src1 = $dst" in
defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(_.VT (X86VPermi2X IdxVT.RC:$src1,
(_.VT (X86VPermi2X _.RC:$src1,
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
AVX5128IBase, EVEX_4V, EVEX_B;
}
multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo,
AVX512VLVectorVTInfo ShuffleMask> {
defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
ShuffleMask.info512>,
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512,
ShuffleMask.info512>, EVEX_V512;
AVX512VLVectorVTInfo VTInfo> {
defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>,
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
let Predicates = [HasVLX] in {
defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
ShuffleMask.info128>,
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128,
ShuffleMask.info128>, EVEX_V128;
defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
ShuffleMask.info256>,
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256,
ShuffleMask.info256>, EVEX_V256;
defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>,
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>,
avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
}
}
multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo,
AVX512VLVectorVTInfo Idx,
Predicate Prd> {
let Predicates = [Prd] in
defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
Idx.info512>, EVEX_V512;
defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
let Predicates = [Prd, HasVLX] in {
defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
Idx.info128>, EVEX_V128;
defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
Idx.info256>, EVEX_V256;
defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
}
}
defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d",
avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q",
avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w",
avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
avx512vl_i16_info, HasBWI>,
VEX_W, EVEX_CD8<16, CD8VF>;
defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b",
avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
avx512vl_i8_info, HasVBMI>,
EVEX_CD8<8, CD8VF>;
defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps",
avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd",
avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
// VPERMT2
multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,

View File

@ -414,10 +414,12 @@ def X86VPermt2 : SDNode<"X86ISD::VPERMV3",
SDTCisSameSizeAs<0,2>,
SDTCisSameAs<0,3>]>, []>;
// Even though the index operand should be integer, we need to make it match the
// destination type so that we can pattern match the masked version where the
// index is also the passthru operand.
def X86VPermi2X : SDNode<"X86ISD::VPERMIV3",
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>,
SDTCisSameSizeAs<0,1>,
SDTypeProfile<1, 3, [SDTCisVec<0>,
SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>,
SDTCisSameAs<0,3>]>, []>;

View File

@ -2030,9 +2030,8 @@ define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> %
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd9]
; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x77,0xda]
; CHECK-NEXT: vblendmps %xmm3, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc3]
; CHECK-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
; CHECK-NEXT: vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]
; CHECK-NEXT: retq ## encoding: [0xc3]
%x1cast = bitcast <2 x i64> %x1 to <4 x i32>
%res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2, i8 %x3)