From 0980c9c6f155d8a06ad839d530636bf109aae34b Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Tue, 19 May 2020 11:20:33 +0100 Subject: [PATCH] [X86] Split masked integer vector stores into vXi32/vXi64 variants (PR45975). NFC This effectively splits the scheduling WriteVecMaskedStore(Y) classes into four different classes (one per each variant). The new VecMaskedStore scheduling classes are now correctly marked as 'unsupported' by the bdver2 and btver2 models. No functional change intended. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D80201 --- llvm/lib/Target/X86/X86InstrSSE.td | 18 +++++++++++------- llvm/lib/Target/X86/X86SchedBroadwell.td | 6 ++++-- llvm/lib/Target/X86/X86SchedHaswell.td | 6 ++++-- llvm/lib/Target/X86/X86SchedSandyBridge.td | 6 ++++-- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 6 ++++-- llvm/lib/Target/X86/X86SchedSkylakeServer.td | 6 ++++-- llvm/lib/Target/X86/X86Schedule.td | 14 ++++++++++++-- llvm/lib/Target/X86/X86ScheduleAtom.td | 6 ++++-- llvm/lib/Target/X86/X86ScheduleBdVer2.td | 6 ++++-- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 6 ++++-- llvm/lib/Target/X86/X86ScheduleSLM.td | 6 ++++-- llvm/lib/Target/X86/X86ScheduleZnver1.td | 6 ++++-- llvm/lib/Target/X86/X86ScheduleZnver2.td | 6 ++++-- 13 files changed, 67 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 310c5459808a..0bc027916258 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7735,39 +7735,43 @@ let Predicates = [HasAVX2, NoVLX] in { // multiclass avx2_pmovmask { + Intrinsic IntSt128, Intrinsic IntSt256, + X86SchedWriteMaskMove schedX, + X86SchedWriteMaskMove schedY> { def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, - VEX_4V, Sched<[WriteVecMaskedLoad]>; + VEX_4V, Sched<[schedX.RM]>; def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, - VEX_4V, VEX_L, Sched<[WriteVecMaskedLoadY]>; + VEX_4V, VEX_L, Sched<[schedY.RM]>; def mr : AVX28I<0x8e, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, - VEX_4V, Sched<[WriteVecMaskedStore]>; + VEX_4V, Sched<[schedX.MR]>; def Ymr : AVX28I<0x8e, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, - VEX_4V, VEX_L, Sched<[WriteVecMaskedStoreY]>; + VEX_4V, VEX_L, Sched<[schedY.MR]>; } defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", int_x86_avx2_maskload_d, int_x86_avx2_maskload_d_256, int_x86_avx2_maskstore_d, - int_x86_avx2_maskstore_d_256>; + int_x86_avx2_maskstore_d_256, + WriteVecMaskMove32, WriteVecMaskMove32Y>; defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", int_x86_avx2_maskload_q, int_x86_avx2_maskload_q_256, int_x86_avx2_maskstore_q, - int_x86_avx2_maskstore_q_256>, VEX_W; + int_x86_avx2_maskstore_q_256, + WriteVecMaskMove64, WriteVecMaskMove64Y>, VEX_W; multiclass maskmov_lowering { diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 4356045a5498..0774de39cc01 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -352,8 +352,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 8b460e5def73..c973de89b256 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -392,8 +392,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index c4a8cc4c916f..149d0bd4d24e 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -367,8 +367,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index c9968fd58280..51863e88e91e 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -343,8 +343,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 256a1f61da1b..7e93f5a0f0cc 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -343,8 +343,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index b09486bf069c..f204d6622119 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -341,8 +341,10 @@ def WriteVecStoreX : SchedWrite; def WriteVecStoreY : SchedWrite; def WriteVecStoreNT : SchedWrite; def WriteVecStoreNTY : SchedWrite; -def WriteVecMaskedStore : SchedWrite; -def WriteVecMaskedStoreY : SchedWrite; +def WriteVecMaskedStore32 : SchedWrite; +def WriteVecMaskedStore64 : SchedWrite; +def WriteVecMaskedStore32Y : SchedWrite; +def WriteVecMaskedStore64Y : SchedWrite; def WriteVecMove : SchedWrite; def WriteVecMoveX : SchedWrite; def WriteVecMoveY : SchedWrite; @@ -550,6 +552,14 @@ def WriteFMaskMove32Y : X86SchedWriteMaskMove; def WriteFMaskMove64Y : X86SchedWriteMaskMove; +def WriteVecMaskMove32 + : X86SchedWriteMaskMove; +def WriteVecMaskMove64 + : X86SchedWriteMaskMove; +def WriteVecMaskMove32Y + : X86SchedWriteMaskMove; +def WriteVecMaskMove64Y + : X86SchedWriteMaskMove; // Vector width wrappers. def SchedWriteFAdd diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index d63cca018c6a..b90baf6c16b1 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -369,8 +369,10 @@ def : WriteRes; defm : X86WriteResUnsupported; def : WriteRes; defm : X86WriteResUnsupported; -def : WriteRes; -defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; def : WriteRes; def : WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td index 6f8524b94dae..0a201bc74a48 100644 --- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td @@ -1093,8 +1093,10 @@ def : InstRW<[PdWriteVMOVDQUYmr], (instrs VMOVDQUYmr)>; defm : PdWriteRes; defm : PdWriteRes; -defm : PdWriteRes; -defm : PdWriteRes; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; defm : PdWriteRes; defm : PdWriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 23967f320544..13b6eed5126d 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -670,8 +670,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; +defm : X86WriteResUnsupported; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index bedbbbc5f978..3d53ef104ed6 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -311,8 +311,10 @@ def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; -def : WriteRes; -def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 089e7aafaf22..fe09d6f85221 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -388,8 +388,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td index 1517ae7d46b5..48da0d6329b1 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver2.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td @@ -370,8 +370,10 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes;