[X86][AVX512] Tag vshift/vpermv/pshufd/pshufb instructions scheduler classes

llvm-svn: 319540
This commit is contained in:
Simon Pilgrim 2017-12-01 13:25:54 +00:00
parent 9c13c8b6ec
commit 2dc4ff1cde
6 changed files with 623 additions and 585 deletions

View File

@ -5123,135 +5123,148 @@ defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm,
// AVX-512 Shift instructions
//===----------------------------------------------------------------------===//
multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
string OpcodeStr, SDNode OpNode, OpndItins itins,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
(ins _.RC:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
SSE_INTSHIFT_ITINS_P.rr>;
itins.rr>, Sched<[itins.Sched]>;
defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i8 imm:$src2))),
SSE_INTSHIFT_ITINS_P.rm>;
itins.rm>, Sched<[itins.Sched.Folded]>;
}
}
multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
string OpcodeStr, SDNode OpNode, OpndItins itins,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
SSE_INTSHIFT_ITINS_P.rm>, EVEX_B;
itins.rm>, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
X86VectorVTInfo _> {
// src2 is always 128-bit
let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, VR128X:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V;
itins.rr>, AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, i128mem:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase,
EVEX_4V;
itins.rm>, AVX512BIBase,
EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType SrcVT, PatFrag bc_frag,
AVX512VLVectorVTInfo VTInfo, Predicate prd> {
OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
AVX512VLVectorVTInfo VTInfo, Predicate prd> {
let Predicates = [prd] in
defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
VTInfo.info512>, EVEX_V512,
EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
VTInfo.info256>, EVEX_V256,
EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
VTInfo.info128>, EVEX_V128,
EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
}
}
multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
string OpcodeStr, SDNode OpNode> {
defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, v4i32, bc_v4i32,
avx512vl_i32_info, HasAVX512>;
defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, v2i64, bc_v2i64,
avx512vl_i64_info, HasAVX512>, VEX_W;
defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, v8i16, bc_v8i16,
avx512vl_i16_info, HasBWI>;
string OpcodeStr, SDNode OpNode,
OpndItins itins> {
defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, itins, v4i32,
bc_v4i32, avx512vl_i32_info, HasAVX512>;
defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, itins, v2i64,
bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, itins, v8i16,
bc_v2i64, avx512vl_i16_info, HasBWI>;
}
multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo VTInfo> {
string OpcodeStr, SDNode OpNode,
OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
let Predicates = [HasAVX512] in
defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
VTInfo.info512>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
VTInfo.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
VTInfo.info256>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
VTInfo.info256>, EVEX_V256;
defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
VTInfo.info128>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
itins, VTInfo.info128>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
VTInfo.info128>, EVEX_V128;
}
}
multiclass avx512_shift_rmi_w<bits<8> opcw,
Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode> {
string OpcodeStr, SDNode OpNode,
OpndItins itins> {
let Predicates = [HasBWI] in
defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
v32i16_info>, EVEX_V512, VEX_WIG;
itins, v32i16_info>, EVEX_V512, VEX_WIG;
let Predicates = [HasVLX, HasBWI] in {
defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
v16i16x_info>, EVEX_V256, VEX_WIG;
itins, v16i16x_info>, EVEX_V256, VEX_WIG;
defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
v8i16x_info>, EVEX_V128, VEX_WIG;
itins, v8i16x_info>, EVEX_V128, VEX_WIG;
}
}
multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode> {
string OpcodeStr, SDNode OpNode, OpndItins itins> {
defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
itins, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
itins, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
}
defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>,
avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>, AVX512BIi8Base, EVEX_4V;
defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
SSE_INTSHIFT_P>,
avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>,
avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>, AVX512BIi8Base, EVEX_4V;
defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
SSE_INTSHIFT_P>,
avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>,
avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V;
defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
SSE_INTSHIFT_P>,
avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri>, AVX512BIi8Base, EVEX_4V;
defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli>, AVX512BIi8Base, EVEX_4V;
defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>;
defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SSE_INTSHIFT_P>;
defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, SSE_INTSHIFT_P>;
defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SSE_INTSHIFT_P>;
// Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
let Predicates = [HasAVX512, NoVLX] in {
@ -5284,25 +5297,27 @@ let Predicates = [HasAVX512, NoVLX] in {
// Variable Bit Shifts
//===-------------------------------------------------------------------===//
multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
OpndItins itins, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V;
itins.rr>, AVX5128IBase, EVEX_4V,
Sched<[itins.Sched]>;
defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1,
(_.VT (bitconvert (_.LdFrag addr:$src2))))),
SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V,
EVEX_CD8<_.EltSize, CD8VF>;
itins.rm>, AVX5128IBase, EVEX_4V,
EVEX_CD8<_.EltSize, CD8VF>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
OpndItins itins, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
@ -5310,29 +5325,30 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src1, ${src2}"##_.BroadcastStr,
(_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src2))))),
SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
itins.rm>, AVX5128IBase, EVEX_B,
EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo _> {
OpndItins itins, AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>,
avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
}
}
multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode,
SDNode OpNode, OpndItins itins> {
defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, itins,
avx512vl_i32_info>;
defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode,
defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, itins,
avx512vl_i64_info>, VEX_W;
}
@ -5358,30 +5374,30 @@ multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
}
}
multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
SDNode OpNode, OpndItins itins> {
let Predicates = [HasBWI] in
defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, v32i16_info>,
defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i16_info>,
EVEX_V512, VEX_W;
let Predicates = [HasVLX, HasBWI] in {
defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, v16i16x_info>,
defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i16x_info>,
EVEX_V256, VEX_W;
defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, v8i16x_info>,
defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v8i16x_info>,
EVEX_V128, VEX_W;
}
}
defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
avx512_var_shift_w<0x12, "vpsllvw", shl>;
defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SSE_INTSHIFT_P>,
avx512_var_shift_w<0x12, "vpsllvw", shl, SSE_INTSHIFT_P>;
defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
avx512_var_shift_w<0x11, "vpsravw", sra>;
defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SSE_INTSHIFT_P>,
avx512_var_shift_w<0x11, "vpsravw", sra, SSE_INTSHIFT_P>;
defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
avx512_var_shift_w<0x10, "vpsrlvw", srl>;
defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SSE_INTSHIFT_P>,
avx512_var_shift_w<0x10, "vpsrlvw", srl, SSE_INTSHIFT_P>;
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SSE_INTSHIFT_P>;
defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SSE_INTSHIFT_P>;
defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
@ -5559,64 +5575,64 @@ let Predicates = [HasAVX512, NoVLX] in {
// 1-src variable permutation VPERMW/D/Q
//===-------------------------------------------------------------------===//
multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo _> {
OpndItins itins, AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in
defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
}
multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo VTInfo> {
OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
let Predicates = [HasAVX512] in
defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
VTInfo.info512>,
itins, VTInfo.info512>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
VTInfo.info512>, EVEX_V512;
itins, VTInfo.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in
defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
VTInfo.info256>,
itins, VTInfo.info256>,
avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
VTInfo.info256>, EVEX_V256;
itins, VTInfo.info256>, EVEX_V256;
}
multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
Predicate prd, SDNode OpNode,
AVX512VLVectorVTInfo _> {
OpndItins itins, AVX512VLVectorVTInfo _> {
let Predicates = [prd] in
defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
EVEX_V512 ;
let Predicates = [HasVLX, prd] in {
defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
EVEX_V256 ;
defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>,
defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
EVEX_V128 ;
}
}
defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
avx512vl_i16_info>, VEX_W;
AVX2_PERMV_I, avx512vl_i16_info>, VEX_W;
defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
avx512vl_i8_info>;
AVX2_PERMV_I, avx512vl_i8_info>;
defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
avx512vl_i32_info>;
AVX2_PERMV_I, avx512vl_i32_info>;
defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
avx512vl_i64_info>, VEX_W;
AVX2_PERMV_I, avx512vl_i64_info>, VEX_W;
defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
avx512vl_f32_info>;
AVX2_PERMV_F, avx512vl_f32_info>;
defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
avx512vl_f64_info>, VEX_W;
AVX2_PERMV_F, avx512vl_f64_info>, VEX_W;
defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
X86VPermi, avx512vl_i64_info>,
X86VPermi, AVX2_PERMV_I, avx512vl_i64_info>,
EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
X86VPermi, avx512vl_f64_info>,
X86VPermi, AVX2_PERMV_F, avx512vl_f64_info>,
EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - VPERMIL
@ -5670,7 +5686,7 @@ multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, AVX_VPERMILV, _, Ctrl>;
defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
X86VPermilpi, _>,
X86VPermilpi, AVX_VPERMILV, _>,
EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
@ -5686,24 +5702,25 @@ defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
//===----------------------------------------------------------------------===//
defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
X86PShufd, avx512vl_i32_info>,
X86PShufd, SSE_PSHUF, avx512vl_i32_info>,
EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
X86PShufhw>, EVEX, AVX512XSIi8Base;
X86PShufhw, SSE_PSHUF>, EVEX, AVX512XSIi8Base;
defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
X86PShuflw>, EVEX, AVX512XDIi8Base;
X86PShuflw, SSE_PSHUF>, EVEX, AVX512XDIi8Base;
multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins> {
let Predicates = [HasBWI] in
defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512;
defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v64i8_info>, EVEX_V512;
let Predicates = [HasVLX, HasBWI] in {
defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, v32i8x_info>, EVEX_V256;
defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, v16i8x_info>, EVEX_V128;
defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i8x_info>, EVEX_V256;
defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i8x_info>, EVEX_V128;
}
}
defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>, VEX_WIG;
defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, SSE_PSHUFB>, VEX_WIG;
//===----------------------------------------------------------------------===//
// Move Low to High and High to Low packed FP Instructions

View File

@ -139,6 +139,11 @@ def SSE_INTMUL_ITINS_P : OpndItins<
IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM
>;
// FIXME: Merge SSE_INTSHIFT_P + SSE_INTSHIFT_ITINS_P.
def SSE_INTSHIFT_P : OpndItins<
IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM
>;
def SSE_INTSHIFT_ITINS_P : ShiftOpndItins<
IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM, IIC_SSE_INTSH_P_RI
>;
@ -3891,9 +3896,14 @@ defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
// SSE2 - Packed Integer Shuffle Instructions
//===---------------------------------------------------------------------===//
let Sched = WriteShuffle in
def SSE_PSHUF : OpndItins<
IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
>;
let ExeDomain = SSEPackedInt in {
multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256,
SDNode OpNode, Predicate prd> {
SDNode OpNode, OpndItins itins, Predicate prd> {
let Predicates = [HasAVX, prd] in {
def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, u8imm:$src2),
@ -3901,15 +3911,15 @@ let Predicates = [HasAVX, prd] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>, VEX_WIG;
itins.rr>, VEX, Sched<[itins.Sched]>, VEX_WIG;
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)),
(i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX,
Sched<[WriteShuffleLd]>, VEX_WIG;
(i8 imm:$src2))))], itins.rm>, VEX,
Sched<[itins.Sched.Folded]>, VEX_WIG;
}
let Predicates = [HasAVX2, prd] in {
@ -3919,15 +3929,15 @@ let Predicates = [HasAVX2, prd] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (i8 imm:$src2))))],
IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>, VEX_WIG;
itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>, VEX_WIG;
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, u8imm:$src2),
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)),
(i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, VEX_L,
Sched<[WriteShuffleLd]>, VEX_WIG;
(i8 imm:$src2))))], itins.rm>, VEX, VEX_L,
Sched<[itins.Sched.Folded]>, VEX_WIG;
}
let Predicates = [UseSSE2] in {
@ -3937,23 +3947,24 @@ let Predicates = [UseSSE2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))],
IIC_SSE_PSHUF_RI>, Sched<[WriteShuffle]>;
itins.rr>, Sched<[itins.Sched]>;
def mi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)),
(i8 imm:$src2))))], IIC_SSE_PSHUF_MI>,
Sched<[WriteShuffleLd, ReadAfterLd]>;
(i8 imm:$src2))))], itins.rm>,
Sched<[itins.Sched.Folded]>;
}
}
} // ExeDomain = SSEPackedInt
defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd, NoVLX>, PD;
defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw,
defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd, SSE_PSHUF,
NoVLX>, PD;
defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw, SSE_PSHUF,
NoVLX_Or_NoBWI>, XS;
defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw,
defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw, SSE_PSHUF,
NoVLX_Or_NoBWI>, XD;
//===---------------------------------------------------------------------===//
@ -8086,6 +8097,16 @@ let Predicates = [HasAVX1Only] in {
// VPERM - Permute instructions
//
let Sched = WriteFShuffle256 in
def AVX2_PERMV_F : OpndItins<
IIC_SSE_SHUFP, IIC_SSE_SHUFP
>;
let Sched = WriteShuffle256 in
def AVX2_PERMV_I : OpndItins<
IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
>;
multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
ValueType OpVT, X86FoldableSchedWrite Sched,
X86MemOperand memOp> {

View File

@ -16,8 +16,8 @@ define void @f_fu(float* %ret, float* %aa, float %b) {
; CHECK-NEXT: movw $-21846, %ax ## imm = 0xAAAA
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovdqa32 {{.*}}(%rip), %zmm1 {%k1}
; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-NEXT: vmovups %zmm0, (%rdi)
; CHECK-NEXT: retq

View File

@ -4353,7 +4353,7 @@ define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
; GENERIC-LABEL: trunc_16i32_to_16i1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0
; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovd %k0, %eax
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
@ -4547,7 +4547,7 @@ define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
; GENERIC-LABEL: test21:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpsllw $7, %zmm2, %zmm2
; GENERIC-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [3:1.00]
; GENERIC-NEXT: vpmovb2m %zmm2, %k1
; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; GENERIC-NEXT: kshiftrq $32, %k1, %k1
@ -7703,7 +7703,7 @@ define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
; GENERIC-LABEL: test_build_vec_v64i1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero
; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_build_vec_v64i1:
@ -8035,7 +8035,7 @@ define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
; GENERIC-LABEL: store_32i1_1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpsllw $15, %zmm0, %zmm0
; GENERIC-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpmovw2m %zmm0, %k0
; GENERIC-NEXT: kmovd %k0, (%rdi)
; GENERIC-NEXT: vzeroupper
@ -8058,7 +8058,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
;
; GENERIC-LABEL: store_64i1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpsllw $7, %zmm0, %zmm0
; GENERIC-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: vpmovb2m %zmm0, %k0
; GENERIC-NEXT: kmovq %k0, (%rdi)
; GENERIC-NEXT: vzeroupper

File diff suppressed because it is too large Load Diff

View File

@ -6,6 +6,7 @@ declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double>, i32,
define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512:
; CHECK: ## BB#0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: kshiftlb $7, %k0, %k1
; CHECK-NEXT: kshiftrb $7, %k1, %k1
@ -15,7 +16,6 @@ define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0,
; CHECK-NEXT: kmovw %k1, %ecx
; CHECK-NEXT: vmovd %ecx, %xmm2
; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT: vpsllq $63, %xmm2, %xmm2
; CHECK-NEXT: vpsraq $63, %zmm2, %zmm2
; CHECK-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm1