[X86] Split masked integer vector stores into vXi32/vXi64 variants (PR45975). NFC

This effectively splits the scheduling WriteVecMaskedStore(Y) classes
into four different classes (one per each variant).

The new VecMaskedStore scheduling classes are now correctly marked as
'unsupported' by the bdver2 and btver2 models.

No functional change intended.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D80201
This commit is contained in:
Andrea Di Biagio 2020-05-19 11:20:33 +01:00
parent 7cefd1b4cd
commit 0980c9c6f1
13 changed files with 67 additions and 31 deletions

View File

@ -7735,39 +7735,43 @@ let Predicates = [HasAVX2, NoVLX] in {
//
multiclass avx2_pmovmask<string OpcodeStr,
Intrinsic IntLd128, Intrinsic IntLd256,
Intrinsic IntSt128, Intrinsic IntSt256> {
Intrinsic IntSt128, Intrinsic IntSt256,
X86SchedWriteMaskMove schedX,
X86SchedWriteMaskMove schedY> {
def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>,
VEX_4V, Sched<[WriteVecMaskedLoad]>;
VEX_4V, Sched<[schedX.RM]>;
def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, i256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
VEX_4V, VEX_L, Sched<[WriteVecMaskedLoadY]>;
VEX_4V, VEX_L, Sched<[schedY.RM]>;
def mr : AVX28I<0x8e, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>,
VEX_4V, Sched<[WriteVecMaskedStore]>;
VEX_4V, Sched<[schedX.MR]>;
def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
VEX_4V, VEX_L, Sched<[WriteVecMaskedStoreY]>;
VEX_4V, VEX_L, Sched<[schedY.MR]>;
}
defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
int_x86_avx2_maskload_d,
int_x86_avx2_maskload_d_256,
int_x86_avx2_maskstore_d,
int_x86_avx2_maskstore_d_256>;
int_x86_avx2_maskstore_d_256,
WriteVecMaskMove32, WriteVecMaskMove32Y>;
defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
int_x86_avx2_maskload_q,
int_x86_avx2_maskload_q_256,
int_x86_avx2_maskstore_q,
int_x86_avx2_maskstore_q_256>, VEX_W;
int_x86_avx2_maskstore_q_256,
WriteVecMaskMove64, WriteVecMaskMove64Y>, VEX_W;
multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
ValueType MaskVT> {

View File

@ -352,8 +352,10 @@ defm : X86WriteRes<WriteVecStoreX, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStore32, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStore32Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStore64, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [BWPort015], 1, [1], 1>;

View File

@ -392,8 +392,10 @@ defm : X86WriteRes<WriteVecStoreX, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStore32, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStore32Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStore64, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStore64Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [HWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [HWPort015], 1, [1], 1>;

View File

@ -367,8 +367,10 @@ defm : X86WriteRes<WriteVecStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SBPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SBPort05], 1, [1], 1>;

View File

@ -343,8 +343,10 @@ defm : X86WriteRes<WriteVecStoreX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore32, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore32Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore64, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore64Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKLPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SKLPort015], 1, [1], 1>;

View File

@ -343,8 +343,10 @@ defm : X86WriteRes<WriteVecStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore32, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore32Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore64, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore64Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SKXPort015], 1, [1], 1>;

View File

@ -341,8 +341,10 @@ def WriteVecStoreX : SchedWrite;
def WriteVecStoreY : SchedWrite;
def WriteVecStoreNT : SchedWrite;
def WriteVecStoreNTY : SchedWrite;
def WriteVecMaskedStore : SchedWrite;
def WriteVecMaskedStoreY : SchedWrite;
def WriteVecMaskedStore32 : SchedWrite;
def WriteVecMaskedStore64 : SchedWrite;
def WriteVecMaskedStore32Y : SchedWrite;
def WriteVecMaskedStore64Y : SchedWrite;
def WriteVecMove : SchedWrite;
def WriteVecMoveX : SchedWrite;
def WriteVecMoveY : SchedWrite;
@ -550,6 +552,14 @@ def WriteFMaskMove32Y
: X86SchedWriteMaskMove<WriteFMaskedLoadY, WriteFMaskedStore32Y>;
def WriteFMaskMove64Y
: X86SchedWriteMaskMove<WriteFMaskedLoadY, WriteFMaskedStore64Y>;
def WriteVecMaskMove32
: X86SchedWriteMaskMove<WriteVecMaskedLoad, WriteVecMaskedStore32>;
def WriteVecMaskMove64
: X86SchedWriteMaskMove<WriteVecMaskedLoad, WriteVecMaskedStore64>;
def WriteVecMaskMove32Y
: X86SchedWriteMaskMove<WriteVecMaskedLoadY, WriteVecMaskedStore32Y>;
def WriteVecMaskMove64Y
: X86SchedWriteMaskMove<WriteVecMaskedLoadY, WriteVecMaskedStore64Y>;
// Vector width wrappers.
def SchedWriteFAdd

View File

@ -369,8 +369,10 @@ def : WriteRes<WriteVecStoreX, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecStoreY>;
def : WriteRes<WriteVecStoreNT, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecStoreNTY>;
def : WriteRes<WriteVecMaskedStore, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteVecMaskedStoreY>;
defm : X86WriteResUnsupported<WriteVecMaskedStore32>;
defm : X86WriteResUnsupported<WriteVecMaskedStore64>;
defm : X86WriteResUnsupported<WriteVecMaskedStore32Y>;
defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
def : WriteRes<WriteVecMove, [AtomPort0]>;
def : WriteRes<WriteVecMoveX, [AtomPort01]>;

View File

@ -1093,8 +1093,10 @@ def : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>;
defm : PdWriteRes<WriteVecStoreNT, [PdStore, PdFPU1, PdFPSTO], 2>;
defm : PdWriteRes<WriteVecStoreNTY, [PdStore, PdFPU1, PdFPSTO], 2, [2, 2, 2], 4>;
defm : PdWriteRes<WriteVecMaskedStore, [PdStore, PdFPU01, PdFPMAL], 6, [1, 1, 4]>;
defm : PdWriteRes<WriteVecMaskedStoreY, [PdStore, PdFPU01, PdFPMAL], 6, [2, 2, 4], 2>;
defm : X86WriteResUnsupported<WriteVecMaskedStore32>;
defm : X86WriteResUnsupported<WriteVecMaskedStore32Y>;
defm : X86WriteResUnsupported<WriteVecMaskedStore64>;
defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
defm : PdWriteRes<WriteVecMove, [PdFPU01, PdFPMAL], 2>;
defm : PdWriteRes<WriteVecMoveX, [PdFPU01, PdFPMAL], 1, [1, 2]>;

View File

@ -670,8 +670,10 @@ defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1],
defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [2, 2, 2], 2>;
defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
defm : X86WriteResUnsupported<WriteVecMaskedStore32>;
defm : X86WriteResUnsupported<WriteVecMaskedStore64>;
defm : X86WriteResUnsupported<WriteVecMaskedStore32Y>;
defm : X86WriteResUnsupported<WriteVecMaskedStore64Y>;
defm : X86WriteRes<WriteVecMove, [JFPU01, JVALU], 1, [1, 1], 1>;
defm : X86WriteRes<WriteVecMoveX, [JFPU01, JVALU], 1, [1, 1], 1>;

View File

@ -311,8 +311,10 @@ def : WriteRes<WriteVecStoreX, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreNT, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreNTY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStore32, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStore32Y, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStore64, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStore64Y, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveX, [SLM_FPC_RSV01]>;
def : WriteRes<WriteVecMoveY, [SLM_FPC_RSV01]>;

View File

@ -388,8 +388,10 @@ defm : X86WriteRes<WriteVecStoreX, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreY, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMaskedStore32, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore32Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMaskedStore64, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [ZnFPU], 2, [1], 2>;

View File

@ -370,8 +370,10 @@ defm : X86WriteRes<WriteVecStoreX, [Zn2AGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreY, [Zn2AGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreNT, [Zn2AGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecStoreNTY, [Zn2AGU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStoreY, [Zn2AGU,Zn2FPU01], 5, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore32, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore32Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMaskedStore64, [Zn2AGU,Zn2FPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore64Y, [Zn2AGU,Zn2FPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMove, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [Zn2FPU], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [Zn2FPU], 2, [1], 2>;